In [1]:
#Importing needed libraries
import pandas as pd
import numpy as np
import re

In [2]:
#loading contacts_covid_model csv file
ContactDS = pd.read_csv('Data/contacts_covid_model.csv')

#Renaming Column names
ContactDS=ContactDS.rename(columns={'time':'Time','from':'Source','to':'Target','weight':'Weight','label':'Location'})

#Selecting only the needed columns
event_location=ContactDS[['Source','Target','Location']]

#converting all the datatypes to string
event_location = event_location.applymap(str)
#event_location.to_csv("event_location.csv")

#removing duplicates if any
event_location = event_location.drop_duplicates()
event_location

Unnamed: 0,Source,Target,Location
0,14298,14357,Restaurant
1,425,6831,School
2,301,15510,ConstructionSite
3,1779,18245,Shop
4,9585,18081,School
...,...,...,...
1769320,13183,19881,SingleParent
1769325,15269,16494,CovidHospital
1769364,4761,14982,Shop
1769374,2286,16004,SmallFamily


In [3]:
#Extracting unique location from the dataset
#Location_color = event_location.Location.unique()
#Location_color = pd.DataFrame(Location_color,columns=['Location'])
Infect_Location=['Restaurant','School','Shop','Office','ConstructionSite','CovidHospital','Hospital','Nursery','CareHome','Home']

#Assigning background colour, cluster for location
Location_color = ['#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9','#bc80bd']

#Assigning shape of node for each cluster location
Location_shape = ['circle','Msquare','box','egg','triangle','diamond','septagon','pentagon','Mdiamond','house']
Location_Filter = pd.DataFrame([Infect_Location,Location_color,Location_shape],index=['Location','bgcolor','shape']).T

#Assigning Cluster number
Location_Filter['Cluster'] = range(1,1+len(Location_Filter))

#Type conversion all columns to string
#Location_Filter = Location_Filter.applymap(str)
Location_Filter

Unnamed: 0,Location,bgcolor,shape,Cluster
0,Restaurant,#8dd3c7,circle,1
1,School,#ffffb3,Msquare,2
2,Shop,#bebada,box,3
3,Office,#fb8072,egg,4
4,ConstructionSite,#80b1d3,triangle,5
5,CovidHospital,#fdb462,diamond,6
6,Hospital,#b3de69,septagon,7
7,Nursery,#fccde5,pentagon,8
8,CareHome,#d9d9d9,Mdiamond,9
9,Home,#bc80bd,house,10


In [23]:
#Assigning all other location nodes to Home that are other than listed in Location_Filter
event_location['Location']=np.where(event_location.Location.isin(Location_Filter['Location']),event_location.Location,'Home')
event_location

Unnamed: 0,Source,Target,Location
0,14298,14357,Restaurant
1,425,6831,School
2,301,15510,ConstructionSite
3,1779,18245,Shop
4,9585,18081,School
...,...,...,...
1769320,13183,19881,Home
1769325,15269,16494,CovidHospital
1769364,4761,14982,Shop
1769374,2286,16004,Home


In [24]:
#Merging contacts_covid_model dataset with the Location_Filter to get attributes
event_infect = pd.merge(event_location, Location_Filter, on=['Location', 'Location'], how='inner')
event_infect

Unnamed: 0,Source,Target,Location,bgcolor,shape,Cluster
0,14298,14357,Restaurant,#8dd3c7,circle,1
1,15357,19456,Restaurant,#8dd3c7,circle,1
2,8611,18859,Restaurant,#8dd3c7,circle,1
3,10734,18854,Restaurant,#8dd3c7,circle,1
4,558,15685,Restaurant,#8dd3c7,circle,1
...,...,...,...,...,...,...
841348,854,9462,CareHome,#d9d9d9,Mdiamond,9
841349,4697,17000,CareHome,#d9d9d9,Mdiamond,9
841350,5153,5473,CareHome,#d9d9d9,Mdiamond,9
841351,60,5532,CareHome,#d9d9d9,Mdiamond,9


Processing of edges to get attributes for the nodes 

In [29]:
#treating Target infection as nodes and retrieving its attributes 
TNL=event_infect[['Target','Location','bgcolor','shape','Cluster']]

#Renaming Target infection as Node
TNL = TNL.rename(columns={'Target':'Node'})
TNL = TNL.drop_duplicates()

TNL

Unnamed: 0,Node,Location,bgcolor,shape,Cluster
0,14357,Restaurant,#8dd3c7,circle,1
1,19456,Restaurant,#8dd3c7,circle,1
2,18859,Restaurant,#8dd3c7,circle,1
3,18854,Restaurant,#8dd3c7,circle,1
4,15685,Restaurant,#8dd3c7,circle,1
...,...,...,...,...,...
839498,1657,CareHome,#d9d9d9,Mdiamond,9
839678,5532,CareHome,#d9d9d9,Mdiamond,9
840111,3809,CareHome,#d9d9d9,Mdiamond,9
840396,1232,CareHome,#d9d9d9,Mdiamond,9


In [33]:
#treating source infection as nodes and retrieving its attributes 
SNL=event_infect[['Source','Location','bgcolor','shape','Cluster']]

#Renaming Source infection as Node
SNL = SNL.rename(columns={'Source':'Node'})
SNL = SNL.drop_duplicates()
SNL = SNL[~SNL.Node.isin(TNL.Node)]
SNL

Unnamed: 0,Node,Location,bgcolor,shape,Cluster
32,278,Restaurant,#8dd3c7,circle,1
34,859,Restaurant,#8dd3c7,circle,1
52,1333,Restaurant,#8dd3c7,circle,1
102,1190,Restaurant,#8dd3c7,circle,1
107,469,Restaurant,#8dd3c7,circle,1
...,...,...,...,...,...
833922,705,Nursery,#fccde5,pentagon,8
837145,1047,CareHome,#d9d9d9,Mdiamond,9
837146,56,CareHome,#d9d9d9,Mdiamond,9
837168,295,CareHome,#d9d9d9,Mdiamond,9


In [37]:
#Combining Both Source and Target infection in single dataframe to use as node of the graph
NL = TNL.append(SNL)

#Removing duplicates if any
NL = NL.drop_duplicates()
NL

Unnamed: 0,Node,Location,bgcolor,shape,Cluster
0,14357,Restaurant,#8dd3c7,circle,1
1,19456,Restaurant,#8dd3c7,circle,1
2,18859,Restaurant,#8dd3c7,circle,1
3,18854,Restaurant,#8dd3c7,circle,1
4,15685,Restaurant,#8dd3c7,circle,1
...,...,...,...,...,...
833922,705,Nursery,#fccde5,pentagon,8
837145,1047,CareHome,#d9d9d9,Mdiamond,9
837146,56,CareHome,#d9d9d9,Mdiamond,9
837168,295,CareHome,#d9d9d9,Mdiamond,9


In [38]:
#Counting the number of duplicates on Node
NL['No_Count'] = NL.groupby(['Node'])['Node'].transform('count').rank(ascending=True, method='dense')
NL = NL.sort_values(by=['Node','Cluster'])

#Assigning duplicates that are more than one
#NL = NL[NL.Count>1]
NL.to_csv("NL.csv")
NL

Unnamed: 0,Node,Location,bgcolor,shape,Cluster,No_Count
743,0,Restaurant,#8dd3c7,circle,1,3.0
544244,0,Shop,#bebada,box,3,3.0
661753,0,Home,#bc80bd,house,10,3.0
520,1,Restaurant,#8dd3c7,circle,1,4.0
134645,1,School,#ffffb3,Msquare,2,4.0
...,...,...,...,...,...,...
563940,9998,Shop,#bebada,box,3,3.0
709536,9998,Office,#fb8072,egg,4,3.0
5471,9999,Restaurant,#8dd3c7,circle,1,3.0
547755,9999,Shop,#bebada,box,3,3.0


In [15]:
NL1 = NL[NL.Location.isin(['Restaurant'])]
NL1

Unnamed: 0,Node,Location,bgcolor,shape,Cluster,No_Count
743,0,Restaurant,#8dd3c7,circle,1,3.0
520,1,Restaurant,#8dd3c7,circle,1,4.0
9322,10,Restaurant,#8dd3c7,circle,1,4.0
20,100,Restaurant,#8dd3c7,circle,1,4.0
22500,1000,Restaurant,#8dd3c7,circle,1,3.0
...,...,...,...,...,...,...
22569,9995,Restaurant,#8dd3c7,circle,1,4.0
13098,9996,Restaurant,#8dd3c7,circle,1,3.0
17064,9997,Restaurant,#8dd3c7,circle,1,4.0
16369,9998,Restaurant,#8dd3c7,circle,1,4.0


In [20]:
NL2 = NL[~NL.Node.isin(NL1.Node)]
NL2 = NL2.sort_values(by=['Node', 'Cluster']).drop_duplicates(subset=['Node'], keep='first')
NL2

Unnamed: 0,Node,Location,bgcolor,shape,Cluster,No_Count
543138,10036,Shop,#bebada,box,3,3.0
837241,10405,CareHome,#d9d9d9,Mdiamond,9,1.0
837443,10469,CareHome,#d9d9d9,Mdiamond,9,1.0
837145,1047,CareHome,#d9d9d9,Mdiamond,9,1.0
837221,10655,CareHome,#d9d9d9,Mdiamond,9,1.0
...,...,...,...,...,...,...
837263,9586,CareHome,#d9d9d9,Mdiamond,9,1.0
135648,9593,School,#ffffb3,Msquare,2,3.0
837269,9718,CareHome,#d9d9d9,Mdiamond,9,1.0
837243,9843,CareHome,#d9d9d9,Mdiamond,9,1.0


Loading Infection Map text file

In [12]:
#Removes whitespaces from the string
import re
def removeWhiteSpace(textToParse):
    return "".join(textToParse.split());


def parseNodeTimeObject(nodeText):
    """Takes in an individual node text and separates them into the Node-ID and infection time"""
    nodeText = removeWhiteSpace(nodeText);
    x = re.split("\(", nodeText);
    nodeID = x[0];
    time = re.split("\)", x[1])[0];
    return (nodeID, time);

def parseInfectedNodesList(textToParse):
    """Parses a list of infected nodes text and returns a list of nodes as a Python list"""
    textToParse = removeWhiteSpace(textToParse);
    infectedNodes = re.split(',', re.split('\]$', re.split('^\[', textToParse)[1])[0]);
    return infectedNodes;

attributeText1 = "infectionTime"
lst=[]
cluster=1

with open('Data/output1a-infectionMap.txt') as f:
    lines = f.readlines()
    for line in lines:
        if(line == '\n'):
            cluster+=1
            #Empty line
            pass;
        else:
            # First check how many "->" this infection line has.
            infectionTimes = []
            tempSplit = re.split("->", line)
            color="#FF0000"

            if len(tempSplit) == 2:
                #This is the initial phase in the chain
                tempSourceNode = parseNodeTimeObject(tempSplit[0]);
                tempTargetNodes = parseInfectedNodesList(tempSplit[1]);
                for nodeText in tempTargetNodes:
                    tempTargetNode = parseNodeTimeObject(nodeText);
                    temp = (tempSourceNode[0],tempTargetNode[0], cluster, color)
                    lst.append(temp)
            else:
                #This is further down the chain
                tempSourceNode = parseNodeTimeObject(tempSplit[1]);
                tempTargetNodes = parseInfectedNodesList(tempSplit[2]);
                for nodeText in tempTargetNodes:
                    tempTargetNode = parseNodeTimeObject(nodeText);
                    temp = (tempSourceNode[0], tempTargetNode[0], cluster,color)
                    lst.append(temp)

#Assigning result column list to pandas dataframe                    
infection_cluster = pd.DataFrame(lst,columns =['Source','Target', 'cluster','color'])
#infection_cluster = pd.DataFrame.drop_duplicates(infection_cluster)

#Ranking the infection source nodes based on the size of infected nodes by initial index node
infection_cluster['Rank'] = infection_cluster.groupby(['cluster'])['cluster'].transform('count').rank(ascending=True, method='dense')

#As there were many clusters based on Rank so using Filter for the number of clusters to be displayed
#clusterrange=50
#infection_cluster = infection_cluster[infection_cluster.Rank<=clusterrange]

#Type conversion all columns to string
infection_cluster = infection_cluster.applymap(str)

#Output to csv file for checking result
infection_cluster.to_csv("infection_cluster.csv")
infection_cluster

Unnamed: 0,Source,Target,cluster,color,Rank
0,1,8,1,#FF0000,2.0
1,8,14596,1,#FF0000,2.0
2,7,9378,2,#FF0000,5.0
3,7,0,2,#FF0000,5.0
4,7,4,2,#FF0000,5.0
...,...,...,...,...,...
17911,19652,4955,1567,#FF0000,71.0
17912,19652,4236,1567,#FF0000,71.0
17913,17230,16664,1567,#FF0000,71.0
17914,628,737,1567,#FF0000,71.0


In [13]:
#treating source infection as nodes and retrieving its attributes 
SNC=infection_cluster[['Source','Rank','color']]

#Renaming Source infection as Node
SNC = SNC.rename(columns={'Source':'Node'})
#SNC = SNC.drop_duplicates()
SNC

Unnamed: 0,Node,Rank,color
0,1,2.0,#FF0000
1,8,2.0,#FF0000
2,7,5.0,#FF0000
3,7,5.0,#FF0000
4,7,5.0,#FF0000
...,...,...,...
17911,19652,71.0,#FF0000
17912,19652,71.0,#FF0000
17913,17230,71.0,#FF0000
17914,628,71.0,#FF0000


In [14]:
#treating target infection as nodes and retrieving its attributes 
TNC=infection_cluster[['Target','Rank']]

#Renaming target infection as nodes
TNC = TNC.rename(columns={'Target':'Node'})
#TNC = TNC.drop_duplicates()
TNC

Unnamed: 0,Node,Rank
0,8,2.0
1,14596,2.0
2,9378,5.0
3,0,5.0
4,4,5.0
...,...,...
17911,4955,71.0
17912,4236,71.0
17913,16664,71.0
17914,737,71.0


In [15]:
#Combining Both Source and Target infection in single dataframe to use as node of the graph
NC = SNC.append(TNC)

#Removing duplicates if any
NC = NC.sort_values(by=['Node', 'Rank', 'color']).drop_duplicates(subset=['Node','Rank'], keep='first')
NC

Unnamed: 0,Node,Rank,color
3,0,5.0,
0,1,2.0,#FF0000
8232,10,33.0,
14500,100,78.0,#FF0000
958,1000,5.0,#FF0000
...,...,...,...
2989,9995,25.0,#FF0000
2986,9996,25.0,#FF0000
17779,9997,14.0,
3083,9998,16.0,#FF0000


In [17]:
#Counting the number of duplicates on Node
NC['Count'] = NC.groupby(['Node'])['Node'].transform('count').rank(ascending=True, method='dense')
NC = NC.sort_values(by=['Node','Rank'])

#Assigning duplicates that are more than one
NC = NC[NC.Count>1]
NC

Unnamed: 0,Node,Rank,color,Count


In [None]:
#Combine Contact_Covid_Model Dataset and InfectionMap Dataset
Combine_data = pd.merge(NC, NL, on=['Node', 'Node'], how='inner')
Combine_data.to_csv("infectionccccc.csv")
Combine_data

In [None]:
#Primary Nodes for cluster Map
PN = Combine_data[Combine_data.color.isin(['#FF0000'])]
PN = PN[['Node','Rank','shape','color','Cluster','bgcolor']]
PN = pd.DataFrame.drop_duplicates(PN)
PN

In [None]:
#Secondary Nodes for cluster
SN = Combine_data[~Combine_data.color.isin(['#FF0000'])]
SN = SN[['Node','Rank','shape','bgcolor','Cluster']]
SN = pd.DataFrame.drop_duplicates(SN)
SN

Combining infection location and assigning color

In [None]:
#secondary_node_color = pd.merge(event_infect, S, on=['Node', 'Node'], how='inner')
#secondary_node_color

In [None]:
#Dotfile Graph Preparation
#Cluster Description
graph = pd.DataFrame(['graph{'],columns=['dotfilegraphformat'])
graph

In [None]:
#Cluster Description for primary Source Node
PNC = PN['Node'] +'[cluster='+ PN['Rank']+' ,shape="'+ PN['shape']+'" ,style="filled,rounded",' +' color="#0000FF"' +', fontname="Arial Bold", fontcolor="#FFFFFF"];'
PNC = pd.DataFrame(PNC,columns=['dotfilegraphformat'])
PNC = PNC.replace()
PNC

In [None]:
graph = graph.append(PNC)
graph

In [None]:
#Cluster Description for secondary Source Node
SNC = SN['Node'] +'[cluster='+ SN['Rank']+', shape="'+ SN['shape']+'", style="filled", '+'color="'+ SN['bgcolor']+'"];'
SNC = SNC.replace()
SNC = pd.DataFrame(SNC,columns=['dotfilegraphformat'])
SNC

In [None]:
graph = graph.append(SNC)
graph

In [None]:
#source and target infected nodes
IC = infection_cluster['Source'] +'--'+ infection_cluster['Target']+';'
IC = pd.DataFrame(IC,columns=['dotfilegraphformat'])
IC

In [None]:
graph  = graph.append(IC)
graph

In [None]:
graph = graph.append(pd.DataFrame(['}'],columns=['dotfilegraphformat']))
graph

In [None]:
numpy_array = graph.to_numpy()
np.savetxt("infection_connected_cluster.gv", numpy_array, fmt="%s")

In [None]:
#p.render('infection_cluster.gv', view=True)  # doctest: +SKIP
#'test-output/round-table.gv.pdf'

For the Location wise gmap

In [None]:
#Dotfile Graph Preparation
#Cluster Description
Lgraph = pd.DataFrame(['graph{'],columns=['dotfilegraphformat'])
Lgraph

In [None]:
#Cluster Description for primary Source Node
LPNC = PN['Node'] +'[cluster='+ PN['Cluster']+', clustercolor="'+ PN['bgcolor']+'", shape="'+ PN['shape']+'", style="filled",' +' color="BLUE"' +', fontname="Arial Bold", fontcolor="#FFFFFF"];'
LPNC = pd.DataFrame(LPNC,columns=['dotfilegraphformat'])
LPNC = LPNC.replace()
LPNC.to_csv("infectionccccc.csv")
LPNC


In [None]:
Lgraph = Lgraph.append(LPNC)
Lgraph

In [None]:
#Cluster Description for secondary Source Node
LSNC = SN['Node'] +'[cluster='+ SN['Cluster']+', clustercolor="'+ SN['bgcolor']+'", shape="'+ SN['shape']+'"];'#, style="filled'+'"];'
LSNC = LSNC.replace()
LSNC = pd.DataFrame(LSNC,columns=['dotfilegraphformat'])
LSNC

In [None]:
Lgraph = Lgraph.append(LSNC)
Lgraph

In [None]:
#source and target infected nodes
LIC = infection_cluster['Source'] +'--'+ infection_cluster['Target']+';'
LIC = pd.DataFrame(LIC,columns=['dotfilegraphformat'])
LIC

In [None]:
Lgraph  = Lgraph.append(LIC)
Lgraph

In [None]:
Lgraph = Lgraph.append(pd.DataFrame(['}'],columns=['dotfilegraphformat']))
Lgraph

In [None]:
Lnumpy_array = Lgraph.to_numpy()
np.savetxt("infection_Location_cluster.gv", Lnumpy_array, fmt="%s")

In [None]:
#p.render('infection_cluster.gv', view=True)  # doctest: +SKIP
#'test-output/round-table.gv.pdf'