In [1]:
#Importing needed libraries
import pandas as pd
import numpy as np

#library for the regular expressions
import re

In [2]:
#loading contacts_covid_model csv file
ContactDS = pd.read_csv('Data/contacts_covid_model.csv')

#Renaming Column names
ContactDS=ContactDS.rename(columns={'time':'Time','from':'Source','to':'Target','weight':'Weight','label':'Location'})

#Creating a UniqueId by concatenating Infection Time, Source Node and Target Node
ContactDS['UniqueId']=ContactDS['Time'].map(str)+'|'+ContactDS['Source'].map(str)+'|'+ContactDS['Target'].map(str)

#Creating another unique id by concatenating Infection Time and Target Node. 
ContactDS['Time|Target']=ContactDS['Time'].map(str)+'|'+ContactDS['Target'].map(str)

#Creating another unique id by concatenating Infection Time and Target Node. 
ContactDS['Time|Source']=ContactDS['Time'].map(str)+'|'+ContactDS['Source'].map(str)

#Selecting only the needed columns
ContactDS=ContactDS[['UniqueId','Time','Source','Target','Location','Time|Target','Time|Source']]

#converting all the datatypes to string
ContactDS = ContactDS.applymap(str)
#event_location.to_csv("event_location.csv")

#removing duplicates if any
ContactDS = ContactDS.drop_duplicates()
ContactDS

Unnamed: 0,UniqueId,Time,Source,Target,Location,Time|Target,Time|Source
0,0|14298|14357,0,14298,14357,Restaurant,0|14357,0|14298
1,0|425|6831,0,425,6831,School,0|6831,0|425
2,0|301|15510,0,301,15510,ConstructionSite,0|15510,0|301
3,0|1779|18245,0,1779,18245,Shop,0|18245,0|1779
4,0|9585|18081,0,9585,18081,School,0|18081,0|9585
...,...,...,...,...,...,...,...
1769380,89|5452|17652,89,5452,17652,CovidHospital,89|17652,89|5452
1769381,89|8818|12335,89,8818,12335,SmallFamily,89|12335,89|8818
1769382,89|8111|18308,89,8111,18308,SmallFamily,89|18308,89|8111
1769383,89|5706|5708,89,5706,5708,SmallFamily,89|5708,89|5706


In [3]:
#Removes whitespaces from the string
import re
def removeWhiteSpace(textToParse):
    return "".join(textToParse.split());


def parseNodeTimeObject(nodeText):
    """Takes in an individual node text and separates them into the Node-ID and infection time"""
    nodeText = removeWhiteSpace(nodeText);
    x = re.split("\(", nodeText);
    nodeID = x[0];
    time = re.split("\)", x[1])[0];
    return (nodeID, time);

def parseInfectedNodesList(textToParse):
    """Parses a list of infected nodes text and returns a list of nodes as a Python list"""
    textToParse = removeWhiteSpace(textToParse);
    infectedNodes = re.split(',', re.split('\]$', re.split('^\[', textToParse)[1])[0]);
    return infectedNodes;

attributeText1 = "infectionTime"
lst=[]
cluster=1

with open('Data/output1a-infectionMap.txt') as f:
    lines = f.readlines()
    for line in lines:
        if(line == '\n'):
            cluster += 1
            #Empty line
            pass;
        else:
            # First check how many "->" this infection line has.
            infectionTimes = []
            tempSplit = re.split("->", line)
            color="#FF0000"

            if len(tempSplit) == 2:
                #This is the initial phase in the chain
                tempSourceNode = parseNodeTimeObject(tempSplit[0]);
                tempTargetNodes = parseInfectedNodesList(tempSplit[1]);
                for nodeText in tempTargetNodes:
                    tempTargetNode = parseNodeTimeObject(nodeText);
                    temp = (int(float(tempTargetNode[1])), tempSourceNode[0],tempTargetNode[0], cluster, color)
                    lst.append(temp)
            else:
                #This is further down the chain
                color=""
                tempSourceNode = parseNodeTimeObject(tempSplit[1]);
                tempTargetNodes = parseInfectedNodesList(tempSplit[2]);
                for nodeText in tempTargetNodes:
                    tempTargetNode = parseNodeTimeObject(nodeText);
                    temp = (int(float(tempTargetNode[1])), tempSourceNode[0], tempTargetNode[0], cluster,color)
                    lst.append(temp)

#Assigning result column list to pandas dataframe                    
InfectionMap = pd.DataFrame(lst,columns =['Time', 'Source','Target', 'cluster','color'])

#Creating UniqueId for the dataset
InfectionMap['UniqueId'] = InfectionMap['Time'].map(str)+'|'+InfectionMap['Source'].map(str)+'|'+InfectionMap['Target'].map(str)

InfectionMap['Time|Target'] = InfectionMap['Time'].map(str)+'|'+InfectionMap['Target'].map(str)


#Ranking the infection source nodes based on the size of infected nodes by initial index node
InfectionMap['Rank'] = InfectionMap.groupby(['cluster'])['cluster'].transform('count').rank(ascending=True, method='dense').astype(int)

#As there were many clusters based on Rank so using Filter for the number of clusters to be displayed
#clusterrange=50
#InfectionMap = InfectionMap[InfectionMap.Rank<=clusterrange]

#Type conversion all columns to string
InfectionMap = InfectionMap.applymap(str)

#Output to csv file for checking result
#InfectionMap.to_csv("InfectionMap.csv")
InfectionMap

Unnamed: 0,Time,Source,Target,cluster,color,UniqueId,Time|Target,Rank
0,6,1,8,1,#FF0000,6|1|8,6|8,2
1,19,8,14596,1,,19|8|14596,19|14596,2
2,3,7,9378,2,#FF0000,3|7|9378,3|9378,5
3,5,7,0,2,#FF0000,5|7|0,5|0,5
4,5,7,4,2,#FF0000,5|7|4,5|4,5
...,...,...,...,...,...,...,...,...
17911,5,19652,4955,1567,,5|19652|4955,5|4955,71
17912,6,19652,4236,1567,,6|19652|4236,6|4236,71
17913,12,17230,16664,1567,,12|17230|16664,12|16664,71
17914,6,628,737,1567,,6|628|737,6|737,71


In [4]:
#Merging Contact_covid_model Dataset and InfectionMap Dataset on the basis of UniqueId(Time, Source and Target Node)
CombineData1 = pd.merge(ContactDS, InfectionMap, on=['UniqueId', 'UniqueId'], how='inner')
CombineData1

Unnamed: 0,UniqueId,Time_x,Source_x,Target_x,Location,Time|Target_x,Time|Source,Time_y,Source_y,Target_y,cluster,color,Time|Target_y,Rank
0,0|16092|19903,0,16092,19903,LargeManyAdultFamily,0|19903,0|16092,0,16092,19903,1268,#FF0000,0|19903,77
1,0|2995|9030,0,2995,9030,SingleAdult,0|9030,0|2995,0,2995,9030,226,#FF0000,0|9030,9
2,0|6367|17494,0,6367,17494,Office,0|17494,0|6367,0,6367,17494,481,#FF0000,0|17494,4
3,0|9300|19300,0,9300,19300,School,0|19300,0|9300,0,9300,19300,735,#FF0000,0|19300,32
4,0|7764|17581,0,7764,17581,School,0|17581,0|7764,0,7764,17581,606,#FF0000,0|17581,74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9228,26|8355|17947,26,8355,17947,Shop,26|17947,26|8355,26,8355,17947,709,,26|17947,44
9229,27|3505|16731,27,3505,16731,Restaurant,27|16731,27|3505,27,3505,16731,1287,,27|16731,26
9230,28|3893|18162,28,3893,18162,ConstructionSite,28|18162,28|3893,28,3893,18162,213,,28|18162,8
9231,29|9615|13287,29,9615,13287,Shop,29|13287,29|9615,29,9615,13287,881,,29|13287,28


In [5]:
#Selecting a node along with exact Infection Location and Rank where Infection Time, Source and Target Node Matches
TargetNode1 = CombineData1[['Target_x','Location','Rank']]
TargetNode1 = TargetNode1.sort_values(by=['Target_x', 'Location']).drop_duplicates(subset=['Target_x'], keep='first')

#Renaming Column Target_x to Node
TargetNode1 = TargetNode1.rename(columns={'Target_x':'Node'})

TargetNode1

Unnamed: 0,Node,Location,Rank
6386,1000,LargeAdult,5
1745,10001,SmallAdult,9
2474,10003,School,37
2201,10005,ConstructionSite,6
8743,10006,Shop,6
...,...,...,...
6541,9990,LargeAdult,11
6274,9993,ConstructionSite,33
629,9996,Shop,25
5927,9998,Office,16


In [6]:
#Selecting dataset where UniqueId(Infection Time, Source and Target Node) of both the dataset doesn't matches
InfectionNode1 = InfectionMap[~InfectionMap.Target.isin(TargetNode1.Node)]
InfectionNode1

Unnamed: 0,Time,Source,Target,cluster,color,UniqueId,Time|Target,Rank
3,5,7,0,2,#FF0000,5|7|0,5|0,5
4,5,7,4,2,#FF0000,5|7|4,5|4,5
13,11,15659,1091,4,,11|15659|1091,11|1091,5
25,2,10469,9392,5,,2|10469|9392,2|9392,27
26,4,7870,5532,5,,4|7870|5532,4|5532,27
...,...,...,...,...,...,...,...,...
17908,3,19652,17275,1567,,3|19652|17275,3|17275,71
17910,4,19652,628,1567,,4|19652|628,4|628,71
17911,5,19652,4955,1567,,5|19652|4955,5|4955,71
17912,6,19652,4236,1567,,6|19652|4236,6|4236,71


In [7]:
#Merging Contact_covid_model Dataset and remaining Dataset on the basis of Infection Time and Target Node
CombineData2 = pd.merge(ContactDS, InfectionNode1, on=['Time|Target', 'Time|Target'], how='inner')
CombineData2

Unnamed: 0,UniqueId_x,Time_x,Source_x,Target_x,Location,Time|Target,Time|Source,Time_y,Source_y,Target_y,cluster,color,UniqueId_y,Rank
0,0|6572|13574,0,6572,13574,School,0|13574,0|6572,0,17632,13574,1381,#FF0000,0|17632|13574,83
1,0|10746|13574,0,10746,13574,School,0|13574,0|10746,0,17632,13574,1381,#FF0000,0|17632|13574,83
2,0|7311|13574,0,7311,13574,School,0|13574,0|7311,0,17632,13574,1381,#FF0000,0|17632|13574,83
3,0|7897|13574,0,7897,13574,School,0|13574,0|7897,0,17632,13574,1381,#FF0000,0|17632|13574,83
4,0|1745|13574,0,1745,13574,School,0|13574,0|1745,0,17632,13574,1381,#FF0000,0|17632|13574,83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8427,27|2691|2692,27,2691,2692,LargeAdult,27|2692,27|2691,27,10361,2692,712,,27|10361|2692,7
8428,28|14924|14927,28,14924,14927,SmallAdult,28|14927,28|14924,28,15484,14927,1021,,28|15484|14927,79
8429,28|8887|14927,28,8887,14927,Shop,28|14927,28|8887,28,15484,14927,1021,,28|15484|14927,79
8430,35|12258|13629,35,12258,13629,Shop,35|13629,35|12258,35,14927,13629,1021,,35|14927|13629,79


In [8]:
#Selecting a node along with exact Infection Location and Rank where Infection Time and Target Node Matches
TargetNode2 = CombineData2[['Target_x','Location','Rank']]
TargetNode2 = TargetNode2.sort_values(by=['Target_x', 'Location']).drop_duplicates(subset=['Target_x'], keep='first')

#Renaming Column Target_x to Node
TargetNode2 = TargetNode2.rename(columns={'Target_x':'Node'})
TargetNode2

Unnamed: 0,Node,Location,Rank
3570,10002,Shop,13
441,10015,DoubleOlder,8
5210,10016,SmallAdult,14
7764,10019,Office,29
1563,10020,School,77
...,...,...,...
2559,9986,School,24
6025,9991,Shop,48
4863,9992,School,57
7824,9994,ConstructionSite,21


In [9]:
#Appending the Nodes and Location
FinalNode1 = TargetNode1.append(TargetNode2)
FinalNode1

Unnamed: 0,Node,Location,Rank
6386,1000,LargeAdult,5
1745,10001,SmallAdult,9
2474,10003,School,37
2201,10005,ConstructionSite,6
8743,10006,Shop,6
...,...,...,...
2559,9986,School,24
6025,9991,Shop,48
4863,9992,School,57
7824,9994,ConstructionSite,21


In [10]:
#Selecting remaining dataset where Infection Time and Target Node doesn't matches too.
InfectionNode2 = InfectionMap[~InfectionMap.Target.isin(FinalNode1.Node)]
InfectionNode2

Unnamed: 0,Time,Source,Target,cluster,color,UniqueId,Time|Target,Rank
3,5,7,0,2,#FF0000,5|7|0,5|0,5
13,11,15659,1091,4,,11|15659|1091,11|1091,5
26,4,7870,5532,5,,4|7870|5532,4|5532,27
28,10,13487,13486,5,,10|13487|13486,10|13486,27
30,4,19539,6761,5,,4|19539|6761,4|6761,27
...,...,...,...,...,...,...,...,...
17902,6,412,409,1567,,6|412|409,6|409,71
17903,12,12746,11941,1567,,12|12746|11941,12|11941,71
17904,14,11941,3044,1567,,14|11941|3044,14|3044,71
17905,18,11941,9662,1567,,18|11941|9662,18|9662,71


In [11]:
#Merging Contact_covid_model Dataset and remaining Dataset on the basis of Target Node
CombineData3 = pd.merge(ContactDS, InfectionNode2, on=['Target', 'Target'], how='inner')
CombineData3

Unnamed: 0,UniqueId_x,Time_x,Source_x,Target,Location,Time|Target_x,Time|Source,Time_y,Source_y,cluster,color,UniqueId_y,Time|Target_y,Rank
0,0|13226|14568,0,13226,14568,Shop,0|14568,0|13226,7,15225,1459,,7|15225|14568,7|14568,11
1,1|8368|14568,1,8368,14568,Shop,1|14568,1|8368,7,15225,1459,,7|15225|14568,7|14568,11
2,8|2322|14568,8,2322,14568,Shop,8|14568,8|2322,7,15225,1459,,7|15225|14568,7|14568,11
3,10|10678|14568,10,10678,14568,Restaurant,10|14568,10|10678,7,15225,1459,,7|15225|14568,7|14568,11
4,12|9637|14568,12,9637,14568,LargeAdult,12|14568,12|9637,7,15225,1459,,7|15225|14568,7|14568,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150238,82|743|779,82,743,779,Shop,82|779,82|743,7,11814,929,#FF0000,7|11814|779,7|779,4
150239,81|1142|1226,81,1142,1226,Office,81|1226,81|1142,11,7629,600,#FF0000,11|7629|1226,11|1226,84
150240,85|535|955,85,535,955,ConstructionSite,85|955,85|535,4,16576,1305,#FF0000,4|16576|955,4|955,12
150241,87|304|784,87,304,784,ConstructionSite,87|784,87|304,11,3733,452,,11|3733|784,11|784,69


In [12]:
#Selecting a node along with exact Infection Location and Rank where Infection Target Node Matches
TargetNode3 = CombineData3[['Time_x','Target','Location','Rank']]

#Removing duplicates and retaining row having highers time values.
TargetNode3 = TargetNode3.sort_values(by=['Time_x','Target', 'Location','Rank']).drop_duplicates(subset=['Target'], keep='last')

#Renaming Column Target_x to Node
TargetNode3 = TargetNode3[['Target', 'Location', 'Rank']]
TargetNode3 = TargetNode3.rename(columns={'Target':'Node'})
TargetNode3

Unnamed: 0,Node,Location,Rank
32876,25,Shop,65
45380,763,Nursery,3
102093,1027,Restaurant,3
89444,364,ConstructionSite,7
104693,432,Restaurant,74
...,...,...,...
101705,9926,Restaurant,14
107514,9959,Office,20
15645,9975,Restaurant,10
105434,999,LargeAdult,65


In [13]:
#Appending the Nodes and Location
FinalNode2 = FinalNode1.append(TargetNode3)
FinalNode2

Unnamed: 0,Node,Location,Rank
6386,1000,LargeAdult,5
1745,10001,SmallAdult,9
2474,10003,School,37
2201,10005,ConstructionSite,6
8743,10006,Shop,6
...,...,...,...
101705,9926,Restaurant,14
107514,9959,Office,20
15645,9975,Restaurant,10
105434,999,LargeAdult,65


In [14]:
#Selecting remaining dataset where InfectionMap Target and ContactDS Target Node doesnot matches.
InfectionNode3 = InfectionMap[~InfectionMap.Target.isin(FinalNode2.Node)]
InfectionNode3 = InfectionNode3[['Target', 'Rank', 'Time|Target']]
InfectionNode3 = InfectionNode3.rename(columns={'Target':'Node'})
InfectionNode3 = InfectionNode3.drop_duplicates()
InfectionNode3

Unnamed: 0,Node,Rank,Time|Target
3,0,5,5|0
42,48,33,1|48
55,46,33,4|46
146,149,3,6|149
298,22,1,10|22
...,...,...,...
17482,321,29,10|321
17600,297,9,4|297
17609,796,26,1|796
17618,82,26,5|82


In [15]:
#combining all Target Nodes
TargetNodes = FinalNode2[['Node']]
TargetNodes1 = InfectionNode3[['Node']] 
TargetNodes = TargetNodes.append(TargetNodes1)
TargetNodes

Unnamed: 0,Node
6386,1000
1745,10001
2474,10003
2201,10005
8743,10006
...,...
17482,321
17600,297
17609,796
17618,82


In [16]:
#Selecting remaining dataset where Infection Source Node doesn't match with Target Nodes.
InfectionNode4 = InfectionMap[~InfectionMap.Source.isin(TargetNodes.Node)]
InfectionNode4 = InfectionNode4[['Source', 'Rank','color']].drop_duplicates()
InfectionNode4['Time|Target'] = '0' + '|' + InfectionNode4['Source']
InfectionNode4 = InfectionNode4.rename(columns={'Source':'Node'})
InfectionNode4

Unnamed: 0,Node,Rank,color,Time|Target
0,1,2,#FF0000,0|1
2,7,5,#FF0000,0|7
7,18,3,#FF0000,0|18
10,32,5,#FF0000,0|32
15,60,27,#FF0000,0|60
...,...,...,...,...
17784,19965,4,#FF0000,0|19965
17788,19976,16,#FF0000,0|19976
17804,19977,18,#FF0000,0|19977
17822,19985,4,#FF0000,0|19985


In [17]:
#Appending the Nodes and Location
FInfectionNode = InfectionNode4.append(InfectionNode3)
FInfectionNode

Unnamed: 0,Node,Rank,color,Time|Target
0,1,2,#FF0000,0|1
2,7,5,#FF0000,0|7
7,18,3,#FF0000,0|18
10,32,5,#FF0000,0|32
15,60,27,#FF0000,0|60
...,...,...,...,...
17482,321,29,,10|321
17600,297,9,,4|297
17609,796,26,,1|796
17618,82,26,,5|82


In [18]:
#Merging Contact_covid_model Dataset and remaining Dataset on the basis of Target Node
CombineData4 = pd.merge(ContactDS, FInfectionNode, on=['Time|Target', 'Time|Target'], how='inner')
CombineData4

Unnamed: 0,UniqueId,Time,Source,Target,Location,Time|Target,Time|Source,Node,Rank,color
0,0|10734|18854,0,10734,18854,Restaurant,0|18854,0|10734,18854,10,#FF0000
1,0|16361|18854,0,16361,18854,Restaurant,0|18854,0|16361,18854,10,#FF0000
2,0|5749|18854,0,5749,18854,Restaurant,0|18854,0|5749,18854,10,#FF0000
3,0|6463|19348,0,6463,19348,Shop,0|19348,0|6463,19348,58,#FF0000
4,0|356|19348,0,356,19348,LargeAdult,0|19348,0|356,19348,58,#FF0000
...,...,...,...,...,...,...,...,...,...,...
2378,0|2373|19765,0,2373,19765,LargeManyAdultFamily,0|19765,0|2373,19765,2,#FF0000
2379,0|8911|8912,0,8911,8912,SmallAdult,0|8912,0|8911,8912,3,#FF0000
2380,0|14100|14103,0,14100,14103,SmallAdult,0|14103,0|14100,14103,22,#FF0000
2381,0|3961|3962,0,3961,3962,SmallAdult,0|3962,0|3961,3962,1,#FF0000


In [19]:
#Selecting a node along with exact Infection Location and Rank where Infection Target Node Matches
TargetNode4 = CombineData4[['Node','Location','Rank','color']]

#Removing duplicates and retaining row having highers time values.
TargetNode4 = TargetNode4.sort_values(by=['Node', 'Location']).drop_duplicates(subset=['Node'], keep='last')

TargetNode4

Unnamed: 0,Node,Location,Rank,color
64,10000,ConstructionSite,9,#FF0000
2248,10029,ConstructionSite,7,#FF0000
2314,10033,ConstructionSite,3,#FF0000
1331,1004,SmallAdult,5,#FF0000
453,10051,School,2,#FF0000
...,...,...,...,...
878,9878,DoubleOlder,17,#FF0000
505,9881,Shop,7,#FF0000
662,9921,Shop,2,#FF0000
1589,9974,Office,10,#FF0000


In [20]:
#Appending the Nodes and Location
FinalNode3 = FinalNode2.append(TargetNode4)
FinalNode3

Unnamed: 0,Node,Location,Rank,color
6386,1000,LargeAdult,5,
1745,10001,SmallAdult,9,
2474,10003,School,37,
2201,10005,ConstructionSite,6,
8743,10006,Shop,6,
...,...,...,...,...
878,9878,DoubleOlder,17,#FF0000
505,9881,Shop,7,#FF0000
662,9921,Shop,2,#FF0000
1589,9974,Office,10,#FF0000


In [21]:
#Selecting remaining dataset where Infection Time and Target Node doesn't matches too.
InfectionNode5 = FInfectionNode[~FInfectionNode.Node.isin(TargetNode4.Node)]
#Renaming Column Target_x to Node
InfectionNode5 = InfectionNode5.rename(columns={'Time|Target':'Time|Source'})
InfectionNode5

Unnamed: 0,Node,Rank,color,Time|Source
0,1,2,#FF0000,0|1
2,7,5,#FF0000,0|7
7,18,3,#FF0000,0|18
10,32,5,#FF0000,0|32
15,60,27,#FF0000,0|60
...,...,...,...,...
17482,321,29,,10|321
17600,297,9,,4|297
17609,796,26,,1|796
17618,82,26,,5|82


In [22]:
#Merging Contact_covid_model Dataset and remaining Dataset on the basis of Target Node
CombineData5 = pd.merge(ContactDS, InfectionNode5, on=['Time|Source', 'Time|Source'], how='inner')
CombineData5

Unnamed: 0,UniqueId,Time,Source,Target,Location,Time|Target,Time|Source,Node,Rank,color
0,0|1742|18906,0,1742,18906,School,0|18906,0|1742,1742,1,#FF0000
1,0|1742|1760,0,1742,1760,LargeTwoAdultFamily,0|1760,0|1742,1742,1,#FF0000
2,0|1742|16850,0,1742,16850,School,0|16850,0|1742,1742,1,#FF0000
3,0|1742|7804,0,1742,7804,School,0|7804,0|1742,1742,1,#FF0000
4,0|1742|14132,0,1742,14132,School,0|14132,0|1742,1742,1,#FF0000
...,...,...,...,...,...,...,...,...,...,...
1268,21|39|9855,21,39,9855,Shop,21|9855,21|39,39,19,
1269,22|1188|9225,22,1188,9225,Shop,22|9225,22|1188,1188,17,
1270,24|239|9856,24,239,9856,Shop,24|9856,24|239,239,10,
1271,24|239|18763,24,239,18763,Restaurant,24|18763,24|239,239,10,


In [23]:
#Selecting a node along with exact Infection Location and Rank where Infection Target Node Matches
TargetNode5 = CombineData5[['Node','Location','Rank','color']]

#Removing duplicates and retaining row having highers time values.
TargetNode5 = TargetNode5.sort_values(by=['Node', 'Location']).drop_duplicates(subset=['Node'], keep='last')

TargetNode5

Unnamed: 0,Node,Location,Rank,color
1005,0,SmallFamily,5,
45,1,School,2,#FF0000
655,1006,SingleAdult,3,#FF0000
401,10100,School,64,#FF0000
465,10117,LargeAdult,2,#FF0000
...,...,...,...,...
361,985,Office,1,#FF0000
562,9886,Office,3,#FF0000
504,9923,ConstructionSite,26,#FF0000
597,998,LargeAdult,5,#FF0000


In [24]:
#Appending the Nodes and Location
FinalNode4 = FinalNode3.append(TargetNode5)
FinalNode4

Unnamed: 0,Node,Location,Rank,color
6386,1000,LargeAdult,5,
1745,10001,SmallAdult,9,
2474,10003,School,37,
2201,10005,ConstructionSite,6,
8743,10006,Shop,6,
...,...,...,...,...
361,985,Office,1,#FF0000
562,9886,Office,3,#FF0000
504,9923,ConstructionSite,26,#FF0000
597,998,LargeAdult,5,#FF0000


In [25]:
#Selecting remaining dataset where Infection Time and Target Node doesn't matches too.
InfectionNode6 = InfectionNode5[~InfectionNode5.Node.isin(TargetNode5.Node)]

#Renaming Column Target_x to Node
InfectionNode6 = InfectionNode6.rename(columns={'Node':'Source'})
InfectionNode6

Unnamed: 0,Source,Rank,color,Time|Source
10,32,5,#FF0000,0|32
89,81,3,#FF0000,0|81
143,145,3,#FF0000,0|145
299,320,2,#FF0000,0|320
315,339,4,#FF0000,0|339
...,...,...,...,...
16999,19056,1,#FF0000,0|19056
17125,19106,11,#FF0000,0|19106
17184,19172,3,#FF0000,0|19172
17197,19206,8,#FF0000,0|19206


In [26]:
#Merging Contact_covid_model Dataset and remaining Dataset on the basis of Target Node
CombineData6 = pd.merge(ContactDS, InfectionNode6, on=['Source', 'Source'], how='inner')
CombineData6

Unnamed: 0,UniqueId,Time,Source,Target,Location,Time|Target,Time|Source_x,Rank,color,Time|Source_y
0,1|19011|19012,1,19011,19012,LargeAdult,1|19012,1|19011,1,#FF0000,0|19011
1,1|19011|19013,1,19011,19013,LargeAdult,1|19013,1|19011,1,#FF0000,0|19011
2,2|19011|19012,2,19011,19012,LargeAdult,2|19012,2|19011,1,#FF0000,0|19011
3,2|19011|19013,2,19011,19013,LargeAdult,2|19013,2|19011,1,#FF0000,0|19011
4,3|19011|19013,3,19011,19013,LargeAdult,3|19013,3|19011,1,#FF0000,0|19011
...,...,...,...,...,...,...,...,...,...,...
11761,38|17490|18717,38,17490,18717,ConstructionSite,38|18717,38|17490,13,#FF0000,0|17490
11762,40|14841|19774,40,14841,19774,Restaurant,40|19774,40|14841,6,#FF0000,0|14841
11763,40|14841|15787,40,14841,15787,Restaurant,40|15787,40|14841,6,#FF0000,0|14841
11764,85|14841|19927,85,14841,19927,Shop,85|19927,85|14841,6,#FF0000,0|14841


In [27]:
#Selecting a node along with exact Infection Location and Rank where Infection Target Node Matches
TargetNode6 = CombineData6[['Time','Source','Location','Rank','color']]

#Removing duplicates and retaining row having highers time values.
TargetNode6 = TargetNode6.sort_values(by=['Time','Source', 'Location']).drop_duplicates(subset=['Source'], keep='last')
TargetNode6 = TargetNode6[['Source','Location','Rank','color']]
TargetNode6 = TargetNode6.rename(columns={'Source':'Node'})
TargetNode6

Unnamed: 0,Node,Location,Rank,color
11641,16887,Restaurant,4,#FF0000
11653,16990,Office,3,#FF0000
11761,17490,ConstructionSite,13,#FF0000
10355,13083,Restaurant,4,#FF0000
10907,18400,Hospital,6,#FF0000
...,...,...,...,...
407,822,ConstructionSite,31,#FF0000
528,8498,Restaurant,2,#FF0000
4270,8662,AdultPensioner,24,#FF0000
6925,904,ConstructionSite,10,#FF0000


In [28]:
#Appending the Nodes and Location
AllNodes = FinalNode4.append(TargetNode6)
#AllNodes.to_csv("AllNodes.csv")
AllNodes

Unnamed: 0,Node,Location,Rank,color
6386,1000,LargeAdult,5,
1745,10001,SmallAdult,9,
2474,10003,School,37,
2201,10005,ConstructionSite,6,
8743,10006,Shop,6,
...,...,...,...,...
407,822,ConstructionSite,31,#FF0000
528,8498,Restaurant,2,#FF0000
4270,8662,AdultPensioner,24,#FF0000
6925,904,ConstructionSite,10,#FF0000


In [29]:
#Extracting unique location from the dataset
#Location_color = event_location.Location.unique()
#Location_color = pd.DataFrame(Location_color,columns=['Location'])
Infect_Location=['Restaurant','School','Shop','Office','ConstructionSite','CovidHospital','Hospital','Nursery','CareHome','Home']

#Assigning background colour, cluster for location
Location_color = ['#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9','#bc80bd']

#Assigning shape of node for each cluster location
Location_shape = ['circle','Msquare','box','egg','triangle','diamond','septagon','pentagon','Mdiamond','house']
Location_Filter = pd.DataFrame([Infect_Location,Location_color,Location_shape],index=['Location','bgcolor','shape']).T

#Assigning Cluster number
Location_Filter['Cluster'] = range(1,1+len(Location_Filter))

#Type conversion all columns to string
#Location_Filter = Location_Filter.applymap(str)
Location_Filter

Unnamed: 0,Location,bgcolor,shape,Cluster
0,Restaurant,#8dd3c7,circle,1
1,School,#ffffb3,Msquare,2
2,Shop,#bebada,box,3
3,Office,#fb8072,egg,4
4,ConstructionSite,#80b1d3,triangle,5
5,CovidHospital,#fdb462,diamond,6
6,Hospital,#b3de69,septagon,7
7,Nursery,#fccde5,pentagon,8
8,CareHome,#d9d9d9,Mdiamond,9
9,Home,#bc80bd,house,10


In [30]:
#Assigning all other location nodes to Home that are other than listed in Location_Filter
AllNodes['Location']=np.where(AllNodes.Location.isin(Location_Filter['Location']),AllNodes.Location,'Home')
#AllNodes.to_csv("AllNodes.csv")
AllNodes

Unnamed: 0,Node,Location,Rank,color
6386,1000,Home,5,
1745,10001,Home,9,
2474,10003,School,37,
2201,10005,ConstructionSite,6,
8743,10006,Shop,6,
...,...,...,...,...
407,822,ConstructionSite,31,#FF0000
528,8498,Restaurant,2,#FF0000
4270,8662,Home,24,#FF0000
6925,904,ConstructionSite,10,#FF0000


In [31]:
#Merging All Nodes dataframe with the Location_Filter
InfectionEvent = pd.merge(AllNodes, Location_Filter, on=['Location', 'Location'], how='inner')
InfectionEvent = InfectionEvent.applymap(str)
#InfectionEvent.to_csv("InfectionEvent.csv")
InfectionEvent

Unnamed: 0,Node,Location,Rank,color,bgcolor,shape,Cluster
0,1000,Home,5,,#bc80bd,house,10
1,10001,Home,9,,#bc80bd,house,10
2,10010,Home,71,,#bc80bd,house,10
3,10011,Home,21,,#bc80bd,house,10
4,10018,Home,16,,#bc80bd,house,10
...,...,...,...,...,...,...,...
19477,1047,CareHome,15,,#d9d9d9,Mdiamond,9
19478,295,CareHome,49,,#d9d9d9,Mdiamond,9
19479,5133,CareHome,15,#FF0000,#d9d9d9,Mdiamond,9
19480,56,CareHome,64,,#d9d9d9,Mdiamond,9


In [32]:
#Primary Nodes for cluster Map
PN = InfectionEvent[InfectionEvent.color.isin(['#FF0000'])]
PN = PN[['Node','Rank','shape','color','Cluster','bgcolor']]
PN

Unnamed: 0,Node,Rank,shape,color,Cluster,bgcolor
5589,1004,5,house,#FF0000,10,#bc80bd
5590,10090,42,house,#FF0000,10,#bc80bd
5591,10109,11,house,#FF0000,10,#bc80bd
5592,10220,1,house,#FF0000,10,#bc80bd
5593,10241,2,house,#FF0000,10,#bc80bd
...,...,...,...,...,...,...
19474,15766,18,Mdiamond,#FF0000,9,#d9d9d9
19475,5638,1,Mdiamond,#FF0000,9,#d9d9d9
19476,6141,9,Mdiamond,#FF0000,9,#d9d9d9
19479,5133,15,Mdiamond,#FF0000,9,#d9d9d9


In [33]:
#Secondary Nodes for cluster
SN = InfectionEvent[~InfectionEvent.color.isin(['#FF0000'])]
SN = SN[['Node','Rank','shape','bgcolor','Cluster']]
SN

Unnamed: 0,Node,Rank,shape,bgcolor,Cluster
0,1000,5,house,#bc80bd,10
1,10001,9,house,#bc80bd,10
2,10010,71,house,#bc80bd,10
3,10011,21,house,#bc80bd,10
4,10018,16,house,#bc80bd,10
...,...,...,...,...,...
19469,854,64,Mdiamond,#d9d9d9,9
19470,9428,18,Mdiamond,#d9d9d9,9
19477,1047,15,Mdiamond,#d9d9d9,9
19478,295,49,Mdiamond,#d9d9d9,9


In [34]:
#Dotfile Graph Preparation
#Cluster Description
graph = pd.DataFrame(['graph{'],columns=['dotfilegraphformat'])
graph

Unnamed: 0,dotfilegraphformat
0,graph{


In [35]:
#Cluster Description for primary Source Node
PNC = PN['Node'] + '[cluster=' + PN['Rank'] + ', shape="'+ PN['shape'] + '", style="filled", color="#0000FF", fontname="Arial Bold", fontcolor="#FFFFFF"];'
PNC = pd.DataFrame(PNC,columns=['dotfilegraphformat'])
PNC

Unnamed: 0,dotfilegraphformat
5589,"1004[cluster=5, shape=""house"", style=""filled"",..."
5590,"10090[cluster=42, shape=""house"", style=""filled..."
5591,"10109[cluster=11, shape=""house"", style=""filled..."
5592,"10220[cluster=1, shape=""house"", style=""filled""..."
5593,"10241[cluster=2, shape=""house"", style=""filled""..."
...,...
19474,"15766[cluster=18, shape=""Mdiamond"", style=""fil..."
19475,"5638[cluster=1, shape=""Mdiamond"", style=""fille..."
19476,"6141[cluster=9, shape=""Mdiamond"", style=""fille..."
19479,"5133[cluster=15, shape=""Mdiamond"", style=""fill..."


In [36]:
graph = graph.append(PNC)
graph

Unnamed: 0,dotfilegraphformat
0,graph{
5589,"1004[cluster=5, shape=""house"", style=""filled"",..."
5590,"10090[cluster=42, shape=""house"", style=""filled..."
5591,"10109[cluster=11, shape=""house"", style=""filled..."
5592,"10220[cluster=1, shape=""house"", style=""filled""..."
...,...
19474,"15766[cluster=18, shape=""Mdiamond"", style=""fil..."
19475,"5638[cluster=1, shape=""Mdiamond"", style=""fille..."
19476,"6141[cluster=9, shape=""Mdiamond"", style=""fille..."
19479,"5133[cluster=15, shape=""Mdiamond"", style=""fill..."


In [37]:
#Cluster Description for secondary Source Node
SNC = SN['Node'] + '[cluster='+SN['Rank'] + ', shape="'+SN['shape'] + '", style="filled", ' + 'color="'+SN['bgcolor']+'"];'
SNC = pd.DataFrame(SNC,columns=['dotfilegraphformat'])
SNC

Unnamed: 0,dotfilegraphformat
0,"1000[cluster=5, shape=""house"", style=""filled"",..."
1,"10001[cluster=9, shape=""house"", style=""filled""..."
2,"10010[cluster=71, shape=""house"", style=""filled..."
3,"10011[cluster=21, shape=""house"", style=""filled..."
4,"10018[cluster=16, shape=""house"", style=""filled..."
...,...
19469,"854[cluster=64, shape=""Mdiamond"", style=""fille..."
19470,"9428[cluster=18, shape=""Mdiamond"", style=""fill..."
19477,"1047[cluster=15, shape=""Mdiamond"", style=""fill..."
19478,"295[cluster=49, shape=""Mdiamond"", style=""fille..."


In [38]:
graph = graph.append(SNC)
graph

Unnamed: 0,dotfilegraphformat
0,graph{
5589,"1004[cluster=5, shape=""house"", style=""filled"",..."
5590,"10090[cluster=42, shape=""house"", style=""filled..."
5591,"10109[cluster=11, shape=""house"", style=""filled..."
5592,"10220[cluster=1, shape=""house"", style=""filled""..."
...,...
19469,"854[cluster=64, shape=""Mdiamond"", style=""fille..."
19470,"9428[cluster=18, shape=""Mdiamond"", style=""fill..."
19477,"1047[cluster=15, shape=""Mdiamond"", style=""fill..."
19478,"295[cluster=49, shape=""Mdiamond"", style=""fille..."


In [39]:
#source and target infected nodes
IC = InfectionMap['Source'] +'--'+ InfectionMap['Target']+';'
IC = pd.DataFrame(IC,columns=['dotfilegraphformat'])
IC

Unnamed: 0,dotfilegraphformat
0,1--8;
1,8--14596;
2,7--9378;
3,7--0;
4,7--4;
...,...
17911,19652--4955;
17912,19652--4236;
17913,17230--16664;
17914,628--737;


In [40]:
graph  = graph.append(IC)
graph

Unnamed: 0,dotfilegraphformat
0,graph{
5589,"1004[cluster=5, shape=""house"", style=""filled"",..."
5590,"10090[cluster=42, shape=""house"", style=""filled..."
5591,"10109[cluster=11, shape=""house"", style=""filled..."
5592,"10220[cluster=1, shape=""house"", style=""filled""..."
...,...
17911,19652--4955;
17912,19652--4236;
17913,17230--16664;
17914,628--737;


In [41]:
graph = graph.append(pd.DataFrame(['}'],columns=['dotfilegraphformat']))
graph

Unnamed: 0,dotfilegraphformat
0,graph{
5589,"1004[cluster=5, shape=""house"", style=""filled"",..."
5590,"10090[cluster=42, shape=""house"", style=""filled..."
5591,"10109[cluster=11, shape=""house"", style=""filled..."
5592,"10220[cluster=1, shape=""house"", style=""filled""..."
...,...
17912,19652--4236;
17913,17230--16664;
17914,628--737;
17915,9857--9859;


In [42]:
numpy_array = graph.to_numpy()
np.savetxt("infection_connected_cluster.gv", numpy_array, fmt="%s")

In [43]:
#p.render('infection_cluster.gv', view=True)  # doctest: +SKIP
#'test-output/round-table.gv.pdf'

For the Location wise gmap

In [44]:
#Dotfile Graph Preparation
#Cluster Description
Lgraph = pd.DataFrame(['graph{'],columns=['dotfilegraphformat'])
Lgraph

Unnamed: 0,dotfilegraphformat
0,graph{


In [45]:
#Cluster Description for primary Source Node
LPNC = PN['Node'] + '[cluster='+PN['Cluster']+', clustercolor="'+ PN['bgcolor']+'", shape="doublecircle", fontname="Arial Bold", fontcolor="#FF0000"];'
LPNC = pd.DataFrame(LPNC,columns=['dotfilegraphformat'])
LPNC


Unnamed: 0,dotfilegraphformat
5589,"1004[cluster=10, clustercolor=""#bc80bd"", shape..."
5590,"10090[cluster=10, clustercolor=""#bc80bd"", shap..."
5591,"10109[cluster=10, clustercolor=""#bc80bd"", shap..."
5592,"10220[cluster=10, clustercolor=""#bc80bd"", shap..."
5593,"10241[cluster=10, clustercolor=""#bc80bd"", shap..."
...,...
19474,"15766[cluster=9, clustercolor=""#d9d9d9"", shape..."
19475,"5638[cluster=9, clustercolor=""#d9d9d9"", shape=..."
19476,"6141[cluster=9, clustercolor=""#d9d9d9"", shape=..."
19479,"5133[cluster=9, clustercolor=""#d9d9d9"", shape=..."


In [46]:
Lgraph = Lgraph.append(LPNC)
Lgraph

Unnamed: 0,dotfilegraphformat
0,graph{
5589,"1004[cluster=10, clustercolor=""#bc80bd"", shape..."
5590,"10090[cluster=10, clustercolor=""#bc80bd"", shap..."
5591,"10109[cluster=10, clustercolor=""#bc80bd"", shap..."
5592,"10220[cluster=10, clustercolor=""#bc80bd"", shap..."
...,...
19474,"15766[cluster=9, clustercolor=""#d9d9d9"", shape..."
19475,"5638[cluster=9, clustercolor=""#d9d9d9"", shape=..."
19476,"6141[cluster=9, clustercolor=""#d9d9d9"", shape=..."
19479,"5133[cluster=9, clustercolor=""#d9d9d9"", shape=..."


In [47]:
#Cluster Description for secondary Source Node
LSNC = SN['Node'] + '[cluster='+SN['Cluster'] + ', clustercolor="'+SN['bgcolor']+'", shape="plaintext"'+'];'#, style="filled'+'"];'
LSNC = pd.DataFrame(LSNC,columns=['dotfilegraphformat'])
LSNC

Unnamed: 0,dotfilegraphformat
0,"1000[cluster=10, clustercolor=""#bc80bd"", shape..."
1,"10001[cluster=10, clustercolor=""#bc80bd"", shap..."
2,"10010[cluster=10, clustercolor=""#bc80bd"", shap..."
3,"10011[cluster=10, clustercolor=""#bc80bd"", shap..."
4,"10018[cluster=10, clustercolor=""#bc80bd"", shap..."
...,...
19469,"854[cluster=9, clustercolor=""#d9d9d9"", shape=""..."
19470,"9428[cluster=9, clustercolor=""#d9d9d9"", shape=..."
19477,"1047[cluster=9, clustercolor=""#d9d9d9"", shape=..."
19478,"295[cluster=9, clustercolor=""#d9d9d9"", shape=""..."


In [48]:
Lgraph = Lgraph.append(LSNC)
Lgraph

Unnamed: 0,dotfilegraphformat
0,graph{
5589,"1004[cluster=10, clustercolor=""#bc80bd"", shape..."
5590,"10090[cluster=10, clustercolor=""#bc80bd"", shap..."
5591,"10109[cluster=10, clustercolor=""#bc80bd"", shap..."
5592,"10220[cluster=10, clustercolor=""#bc80bd"", shap..."
...,...
19469,"854[cluster=9, clustercolor=""#d9d9d9"", shape=""..."
19470,"9428[cluster=9, clustercolor=""#d9d9d9"", shape=..."
19477,"1047[cluster=9, clustercolor=""#d9d9d9"", shape=..."
19478,"295[cluster=9, clustercolor=""#d9d9d9"", shape=""..."


In [49]:
#source and target infected nodes
LIC = InfectionMap['Source'] +'--'+ InfectionMap['Target']+';'
LIC = pd.DataFrame(LIC,columns=['dotfilegraphformat'])
LIC

Unnamed: 0,dotfilegraphformat
0,1--8;
1,8--14596;
2,7--9378;
3,7--0;
4,7--4;
...,...
17911,19652--4955;
17912,19652--4236;
17913,17230--16664;
17914,628--737;


In [50]:
Lgraph  = Lgraph.append(LIC)
Lgraph

Unnamed: 0,dotfilegraphformat
0,graph{
5589,"1004[cluster=10, clustercolor=""#bc80bd"", shape..."
5590,"10090[cluster=10, clustercolor=""#bc80bd"", shap..."
5591,"10109[cluster=10, clustercolor=""#bc80bd"", shap..."
5592,"10220[cluster=10, clustercolor=""#bc80bd"", shap..."
...,...
17911,19652--4955;
17912,19652--4236;
17913,17230--16664;
17914,628--737;


In [51]:
Lgraph = Lgraph.append(pd.DataFrame(['}'],columns=['dotfilegraphformat']))
Lgraph

Unnamed: 0,dotfilegraphformat
0,graph{
5589,"1004[cluster=10, clustercolor=""#bc80bd"", shape..."
5590,"10090[cluster=10, clustercolor=""#bc80bd"", shap..."
5591,"10109[cluster=10, clustercolor=""#bc80bd"", shap..."
5592,"10220[cluster=10, clustercolor=""#bc80bd"", shap..."
...,...
17912,19652--4236;
17913,17230--16664;
17914,628--737;
17915,9857--9859;


In [52]:
Lnumpy_array = Lgraph.to_numpy()
np.savetxt("infection_Location_cluster.gv", Lnumpy_array, fmt="%s")

In [53]:
#p.render('infection_cluster.gv', view=True)  # doctest: +SKIP
#'test-output/round-table.gv.pdf'