In [1]:
# Clark Mollencop
# analyzing the Nepal network with the 3 edge motif counts
# Summer 2022

In [2]:
import pandas as pd
import seaborn as sns
import networkx as nx
import subprocess # to call the C++ executable that actually does the network analysis
import glob # to work with files later on
import bidirec_script # for input/formatting

In [3]:
# runs the program with the command line arguments
# process = subprocess.Popen(["./FAST_temporal_motif", "-input", "input/testAll.txt", "-output", "output/outputAll.txt", "-timesOut", "output/timesAll.txt", "-w", "3"])

In [4]:
#### this cell: convert Nepal network to correct format ####
df = bidirec_script.read_file('networks/nepal.txt', ',', 0, 1, 3)
print(df)
# first, change source/destination names to ints because that is the format expected by the C++ program
df['source_id'] = pd.Categorical(df['source']).codes
# get map of all sources and their ids
locations = {}
for index, data in df.iterrows():
    if data['source'] not in locations.keys():
        locations[data['source']] = data['source_id']
# now, go through the targets in the dataframe and create their destination int id
dests = []
newindex = len(locations.keys())
for index, data in df.iterrows():
    if data['target'] in locations.keys():
        dests.append(locations[data['target']])
    else:
        locations[data['target']] = newindex
        dests.append(newindex)
        newindex += 1
# now add that as dest_id to the original df
df['dest_id'] = dests
# drop unnecessary columns
newdf = df.drop(['source', 'target'], axis=1)
# and rearrange the columns to source_id, dest_id, timestamps
cols = newdf.columns.tolist()
time = cols[0]
cols.pop(0)
cols.append(time)
newdf = newdf[cols]
dfs_all_deltas = []
for i in range(1, 13):
    dfs_all_deltas.append(bidirec_script.dup_edges(newdf, i, 12))
# now save all of these to files for analysis
for d in range(len(dfs_all_deltas)):
    dfs_all_deltas[d].to_csv(path_or_buf= 'input/nepal_d_{num}.txt'.format(num=d+1), sep=' ', header=False, index=False)

      source     target  timestamp
0      Banke      Banke          1
1       Bara      Banke          1
2       Bara       Bara          1
3       Bara  Bhaktapur          1
4       Bara    Chitwan          1
...      ...        ...        ...
3975  Siraha     Siraha         12
3976  Siraha    Sunsari         12
3977  Siraha    Surkhet         12
3978  Siraha    Syangja         12
3979  Siraha  Taplejung         12

[3980 rows x 3 columns]


In [5]:
# run the C++ file on the correct Nepal net
for i in range(1, 13):
    process = subprocess.Popen(["./FAST_temporal_motif", "-input", "input/nepal_d_{num}.txt".format(num=i), "-output", "output/nepal_out_d_{num}.txt".format(num=i), "-timesOut", "output/nepal_times_d_{num}.txt".format(num=i), "-w", "{num}".format(num=i)])

edgeNum: 4069
inserting...
sorting...
loaded file
edgeNum: 4374
inserting...
sorting...
loaded file
edgeNum: 4679
inserting...
sorting...
loaded file
edgeNum: 4984
inserting...
sorting...
loaded file
edgeNum: 5342
inserting...
sorting...
loaded file
edgeNum: 5520
inserting...
sorting...
loaded file
edgeNum: 5698
inserting...
sorting...
loaded file
edgeNum: 6077
inserting...
sorting...
loaded file
edgeNum: 6456
inserting...
sorting...
loaded file
edgeNum: 6835
inserting...
sorting...
loaded file
edgeNum: 7222
inserting...
sorting...
loaded file


In [6]:
# then get that output, save it as a dateframe appropriately for analysis

edgeNum: 7528
inserting...
sorting...
loaded file


In [7]:
# finally graph the output for the motifs i guess