In [135]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import random
from collections import Counter

In [136]:
data = pd.read_csv("22140-0003-Data_combined_without_dublicates.csv", sep=None, engine='python')

In [137]:
data_new = data[["RID","ID1","TIETYPE","RACE","RACE1","SEX","SEX1","AGE","AGE1","EDUC","EDUC1","HIV","HIV1","OCC","OCC1"]].copy()

In [138]:
data_expand = data_new.copy()

for t in range(1, 5):  # Assuming TIETYPE values range from 1 to 4
    data_expand[f'TIETYPE{t}'] = 0

# Set the corresponding TIETYPE column to 1 for each row's TIETYPE value
for idx, row in data_expand.iterrows():
    tietype = row['TIETYPE']
    data_expand.at[idx, f'TIETYPE{tietype}'] = 1

In [139]:
# First, get the unique combinations of RID and ID1
unique_combinations = data_expand.groupby(['RID', 'ID1'])
 
# Initialize the TIETYPE binary columns with 0
for t in range(1, 5):  # Assuming TIETYPE values range from 1 to 4
    data_expand[f'TIETYPE{t}'] = 0
 
# Loop through each unique combination and set the TIETYPE columns based on the values of TIETYPE
for (rid, id1), group in unique_combinations:
    types = group['TIETYPE'].unique()  # Get unique TIETYPE values for the group
    for t in types:
        if t in range(1, 5):  # Check if t is within the expected range
            data_expand.loc[(data_expand['RID'] == rid) & (data_expand['ID1'] == id1), f'TIETYPE{t}'] = 1
 
# Check the result
data_expand.drop(columns=["TIETYPE"], inplace=True)
data_expand.drop_duplicates(inplace=True)

In [140]:
G_1 = nx.from_pandas_edgelist(data_expand, source="RID", target="ID1", edge_attr="TIETYPE1")
G_2 = nx.from_pandas_edgelist(data_expand, source="RID", target="ID1", edge_attr="TIETYPE2")
G_3 = nx.from_pandas_edgelist(data_expand, source="RID", target="ID1", edge_attr="TIETYPE3")
G_4 = nx.from_pandas_edgelist(data_expand, source="RID", target="ID1", edge_attr="TIETYPE4")

node_attributes = {}

race_mapping = {
    -9: "OOD",
    1: "Amer Indian",
    2: "Black",
    3: "Asian/Plsle",
    4: "White",
    5: "Other",
}

education_mapping = {
    -9: "OOD",
    -8: "ID",
    -6: "Don't know",
    2: "Minimal",
    3: "Minimal",
    4: "Minimal",
    5: "Minimal",
    6: "Elementary",
    7: "Elementary",
    8: "Elementary",
    9: "Junior High",
    10: "Junior High",
    11: "Junior High",
    12: "High/GED",
    13: "Freshman Col",
    14: "Trade School",
    15: "Undergrad",
    16: "Collage Grad",
    17: "Post Grad",
    18: "Post Grad",
    19: "Post Grad",
    21: "Undefined Code",
    23: "Undefined Code",
}

HIV_mapping = {
    -9: "OOD",
    -8: "No Test",
    0: "Negative",
    1: "Positive",
}

for _, row in data_expand.iterrows():
    rid = row['RID']
    id1 = row['ID1']

    node_attributes[rid] = {
        "HIV_status": HIV_mapping.get(row["HIV"]),
        "Race": race_mapping.get(row["RACE"]),
        "Sex": "Male" if row["SEX"] == 1 else "Female",
        "Age": row["AGE"],
        "Education level": education_mapping.get(row["EDUC"]),
        "Profession": row["OCC"],
        "Gay": 0
        }
    
    node_attributes[id1] = {
        "HIV_status": HIV_mapping.get(row["HIV1"]),
        "Race": race_mapping.get(row["RACE1"]),
        "Sex": "Male" if row["SEX1"] == 1 else "Female",
        "Age": row["AGE1"],
        "Education level": education_mapping.get(row["EDUC1"]),
        "Profession": row["OCC1"],
        "Gay": 0
        }

nx.set_node_attributes(G_1, node_attributes)
nx.set_node_attributes(G_2, node_attributes)
nx.set_node_attributes(G_3, node_attributes)
nx.set_node_attributes(G_4, node_attributes)

In [141]:
graphs = {i: globals()[f'G_{i}'] for i in range(1, 5)}  # Store reference to each graph

for i in range(1,5):
    edges_to_remove = []
    G = graphs[i]
    for u, v, attr in G.edges(data=True):
        if attr.get(f"TIETYPE{i}", 0) == 0: 
            edges_to_remove.append((u, v)) 

    for u, v in edges_to_remove:
        G.remove_edge(u, v)

In [142]:
for i in range(1, 5):
    G = graphs[i]
    for u, v, attributes in G.edges(data=True):
        if G.nodes[u].get("Sex") == "Male" and G.nodes[v].get("Sex") == "Male":
            G.nodes[u]["Gay"] = 1
            G.nodes[v]["Gay"] = 1

In [143]:
nx.write_graphml(G_1, "G_1.graphml")
nx.write_graphml(G_2, "G_2.graphml")
nx.write_graphml(G_3, "G_3.graphml")
nx.write_graphml(G_4, "G_4.graphml")

In [144]:
# tG is the transmission probability of gay sex
def create_graph(t1, t2, t3, t4, tG):
    B = nx.Graph()

    transmission_probability = {1: t1, 2: t2, 3: t3, 4: t4}
    edge_pair_highest_probability = {}
    edge_tietype = {}
    seen_edges = set()

    # Iterate through each graph and update the edge attributes
    for i in range(1, 5):
        G = graphs[i]
        for u, v in G.edges():
            edge = tuple(sorted((u, v)))  # Sort the edge tuple
            
            # PRobability updatating depending on sex and other tietype connections
            # If both are mean and tietype is sex then transmission probability is tG
            if G.nodes[u].get("Sex") == "Male" and G.nodes[v].get("Sex") == "Male":
                transmission_probability[3] = tG
            # If both are not men and tietype is sex then transmission probability is t3
            elif G.nodes[u].get("Sex") != "Male" or G.nodes[v].get("Sex") != "Male":
                transmission_probability[3] = t3

            # If connection is not a sex connection, transmission probability is 0.x of the original probability
            # Exclude tX if it is None 
            if t3 == None:
                transmission_probability[3] = 0
            if t4 == None:
                transmission_probability[4] = 0


            if edge not in graphs[3].edges():
                transmission_probability[1] = transmission_probability[3] * 0.01

            # If connection is a sex connection, transmission probability is 0 for social
            elif edge in graphs[3].edges():
                transmission_probability[1] = 0

            # Same as above but for drugs and needle
            if edge not in graphs[4].edges():
                transmission_probability[2] = transmission_probability[4] * 0.01

            elif edge in graphs[4].edges():
                transmission_probability[2] = 0

            if t1 == None:
                transmission_probability[1] = 0
            if t2 == None:
                transmission_probability[2] = 0

            # Update probability using the formula
            if edge not in seen_edges:
                edge_pair_highest_probability[edge] = transmission_probability[i]
                seen_edges.add(edge)
                edge_tietype[edge] = [i]  # Initialize edge_tietype as a list
            else:
                edge_pair_highest_probability[edge] = 1 - (
                    (1 - edge_pair_highest_probability[edge]) * (1 - transmission_probability[i])
                )
                edge_tietype[edge].append(i)  # Append to the existing list

    # Convert keys and values to lists
    keys = list(edge_pair_highest_probability.keys())
    values = list(edge_pair_highest_probability.values())

    # Add edges with attributes
    for i in range(len(keys)):
        B.add_edge(
            keys[i][0],
            keys[i][1],
            probability=values[i],
            TIETYPE=edge_tietype[keys[i]],  # Add TIETYPE as a list
        )

    # Set node attributes
    nx.set_node_attributes(B, node_attributes)
    return B

In [145]:
# Probabilities dictionary
base_probabilities = {
    "t1": 0,      # Social
    "t2": 0,      # Drugs
    "t3": 0.008,  # Sexual
    "t4": 0.0063, # Needle
    "tG": 0.0138  # Gay sexual interaction
}

# Function to exclude certain connection types
def exclude_probabilities(base_probs, exclude_types):
    """Return a copy of probabilities with specified types set to 0"""
    new_probs = base_probs.copy()
    for key in exclude_types:
        new_probs[key] = None
    return new_probs

# Create graphs dynamically
graphs_excluded = {}
connection_types = ["t1", "t2", "t3", "t4"]

# Base graph
B = create_graph(**base_probabilities)

# Exclude each connection type and create graphs
for i, conn_type in enumerate(connection_types):
    excluded_probs = exclude_probabilities(base_probabilities, [conn_type])
    graphs_excluded[f"G_no{i+1}"] = create_graph(**excluded_probs)

# Example of accessing graphs
G_no1 = graphs_excluded["G_no1"]
G_no2 = graphs_excluded["G_no2"]
G_no3 = graphs_excluded["G_no3"]
G_no4 = graphs_excluded["G_no4"]

## Creating a graph for B that only shows the highest tietype pr. connection

This is purely for visual purposes in gephi

In [146]:
B_highest_tietype_edges = nx.Graph()

# Iterate through all edges in the original graph B
for u, v, data in B.edges(data=True):
    # Get the tie types from the edge data
    tie_types = data['TIETYPE']
    
    # Determine the highest tie_type
    highest_tie_type = max(tie_types)
    
    # Add the edge to the new graph with the highest tie_type
    B_highest_tietype_edges.add_edge(
        u, v,
        probability=data['probability'],  # Retain the original probability
        TIETYPE=highest_tie_type       # Keep only the highest tie_type
    )

# Copy node attributes from the original graph B
node_attributes = {node: data for node, data in B.nodes(data=True)}
nx.set_node_attributes(B_highest_tietype_edges, node_attributes)

In [147]:
nx.write_graphml(B_highest_tietype_edges, "B_highest_tietype_edges.graphml")

# Change tietype edge attribute structure

Tietype attribute cannot be converted to graph file while attribute is a list. <br>
So we split it into multiple attributes called tietypeX for tietype X with value 0 or 1


In [148]:
excluding_graphs = {i: globals()[f'G_no{i}'] for i in range(1, 5)} 
tietype_list = ["TIETYPE1", "TIETYPE2", "TIETYPE3", "TIETYPE4"]
for i in range(1, 5):
    G = excluding_graphs[i]
    for u, v, attributes in G.edges(data=True):
        # Assign each element of TIETYPE to separate attributes
        for idx, value in enumerate(tietype_list):
            attributes[f"TIETYPE{idx + 1}"] = 1 if idx+1 in attributes.get("TIETYPE", []) else 0
        if "TIETYPE" in attributes:
            del attributes["TIETYPE"]

In [149]:
for u, v, attributes in B.edges(data=True):
        # Assign each element of TIETYPE to separate attributes
        for idx, value in enumerate(tietype_list):
            attributes[f"TIETYPE{idx + 1}"] = 1 if idx+1 in attributes.get("TIETYPE", []) else 0
        if "TIETYPE" in attributes:
            del attributes["TIETYPE"]

In [150]:
nx.write_graphml(G_no1, "G_no1.graphml")
nx.write_graphml(G_no2, "G_no2.graphml")
nx.write_graphml(G_no3, "G_no3.graphml")
nx.write_graphml(G_no4, "G_no4.graphml")
nx.write_graphml(B, "B.graphml")