In [None]:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from itertools import combinations
from scipy.stats import fisher_exact
import statsmodels.stats.contingency_tables as ct

In [None]:
pip install python-igraph

In [None]:
import igraph as ig

In [None]:
pip install cairocffi

In [None]:
pip install pycairo

In [None]:
pip install plotnine

In [None]:
from plotnine import ggplot, aes, geom_tile, geom_text, scale_fill_gradient, ggtitle

## Hipanics network analysis

In [None]:
#read file results_filt and assign to object
icd_hispanics = pd.read_csv('icd_hispanics.txt') 

In [None]:
#read file results_filt and assign to object
edges = pd.read_csv('results_filt.txt') 

In [None]:
edges.head()

In [None]:
# Creating disPrev DataFrame
disPrev1 = edges[['disAcode', 'disA']]
disPrev1.columns = ['dis', 'patients']

disPrev2 = edges[['disBcode', 'disB']]
disPrev2.columns = ['dis', 'patients']

disPrev = pd.concat([disPrev1, disPrev2])
disPrev = disPrev[~disPrev.duplicated(subset='dis')]

# Prevalence calculation
disPrev['prevalence'] = (disPrev['patients'].astype(float) / len(icd_hispanics['patient'].unique())) * 100

disPrev

In [None]:
nodes = disPrev

In [None]:
nodes.head()

In [None]:
g = ig.Graph.TupleList(edges.values,directed=True,
                       edge_attrs=edges.columns[2:])

In [None]:
print(g)

In [None]:
nodes_df = disPrev.set_index(['dis'])
for column in nodes_df:
    g.vs[column] = nodes_df.loc[g.vs['name'],column]

In [None]:
nodes_df = disPrev.set_index(['dis'])
nodes_df.head()

In [None]:
print(g)

In [None]:
#Create igraph object using Nodes and Edges datasets
G = ig.Graph.DictList(
          vertices=nodes.to_dict('records'),
          edges=edges.to_dict('records'),
          directed=True,
          vertex_name_attr='dis',
          edge_foreign_keys=('disAcode', 'disBcode'));

In [None]:
links = edges.groupby(['disAcode', 'disBcode'])['relativeRisk'].sum().reset_index()
print(edges.shape,'\n',links.shape)

In [None]:
g = ig.Graph.TupleList(links.values,directed=True,
                       edge_attrs=links.columns[2:])

for column in nodes_df:
    g.vs[column] = nodes_df.loc[g.vs['name'],column]

In [None]:
# Normalize the prevalence and relativeRisk values
# You can adjust the scaling factor to control the size
max_prevalence = max(g.vs['prevalence'])
max_relative_risk = max(g.es['relativeRisk'])

# Scale factor for node size
node_scale_factor = 60  # Adjust as needed

# Scale factor for edge size
edge_scale_factor = 4  # Adjust as needed

# Set the node size based on 'prevalence'
g.vs['size'] = [v['prevalence'] / max_prevalence * node_scale_factor for v in g.vs]

# Set the edge width based on 'relativeRisk'
g.es['width'] = [e['relativeRisk'] / max_relative_risk * edge_scale_factor for e in g.es]

# Round edge values to two decimal places
rounded_edge_values = [round(value, 2) for value in g.es['relativeRisk']]

# Create a circular layout
layout = g.layout_circle()

# Plot the graph
ig.plot(
    g,
    "Graph.png",
    layout=layout,
    bbox=(300, 300),
    vertex_label=g.vs["name"],
    vertex_size=g.vs['size'],
    vertex_label_size = 12,
    edge_width=g.es['width'],
    edge_label=rounded_edge_values,
    edge_arrow_size=0,
    edge_label_size = 0,
    vertex_color= 'blue',
)

## Non hipanics network analysis

In [None]:
#read file results_filt and assign to object
edges2 = pd.read_csv('results_filt_2.txt') 

In [None]:
edges2.head()

In [None]:
# Creating disPrev DataFrame
disPrev3 = edges2[['disAcode', 'disA']]
disPrev3.columns = ['dis', 'patients']

disPrev4 = edges2[['disBcode', 'disB']]
disPrev4.columns = ['dis', 'patients']

disPrev_2 = pd.concat([disPrev3, disPrev4])
disPrev_2 = disPrev_2[~disPrev_2.duplicated(subset='dis')]

# Prevalence calculation
disPrev_2['prevalence'] = (disPrev_2['patients'].astype(float) / len(icd_hispanics['patient'].unique())) * 100

disPrev_2

In [None]:
nodes2 = disPrev_2

In [None]:
nodes2.head()

In [None]:
g2 = ig.Graph.TupleList(edges2.values,directed=True,
                       edge_attrs=edges2.columns[2:])

In [None]:
print(g2)

In [None]:
nodes_df_2 = disPrev_2.set_index(['dis'])
for column in nodes_df:
    g2.vs[column] = nodes_df_2.loc[g.vs['name'],column]

In [None]:
nodes_df_2 = disPrev_2.set_index(['dis'])
nodes_df_2.head()

In [None]:
print(g2)

In [None]:
#Create igraph object using Nodes and Edges datasets
G2 = ig.Graph.DictList(
          vertices=nodes2.to_dict('records'),
          edges=edges2.to_dict('records'),
          directed=True,
          vertex_name_attr='dis',
          edge_foreign_keys=('disAcode', 'disBcode'));

In [None]:
links2 = edges2.groupby(['disAcode', 'disBcode'])['relativeRisk'].sum().reset_index()
print(edges2.shape,'\n',links2.shape)

In [None]:
g2 = ig.Graph.TupleList(links2.values,directed=True,
                       edge_attrs=links2.columns[2:])

for column in nodes_df:
    g2.vs[column] = nodes_df_2.loc[g2.vs['name'],column]

In [None]:
# Normalize the prevalence and relativeRisk values
# You can adjust the scaling factor to control the size
max_prevalence = max(g2.vs['prevalence'])
max_relative_risk = max(g2.es['relativeRisk'])

# Scale factor for node size
node_scale_factor = 60  # Adjust as needed

# Scale factor for edge size
edge_scale_factor = 4  # Adjust as needed

# Set the node size based on 'prevalence'
g2.vs['size'] = [v['prevalence'] / max_prevalence * node_scale_factor for v in g2.vs]

# Set the edge width based on 'relativeRisk'
g2.es['width'] = [e['relativeRisk'] / max_relative_risk * edge_scale_factor for e in g2.es]

# Round edge values to two decimal places
rounded_edge_values = [round(value, 2) for value in g2.es['relativeRisk']]

# Create a circular layout
layout = g2.layout_circle()

# Plot the graph
ig.plot(
    g2,
    "Graph.png",
    layout=layout,
    bbox=(300, 300),
    vertex_label=g2.vs["name"],
    vertex_size=g2.vs['size'],
    vertex_label_size = 12,
    edge_width=g2.es['width'],
    edge_label=rounded_edge_values,
    edge_arrow_size=0,
    edge_label_size = 0,
    vertex_color= 'blue'
)