This function assumed that we have an input file with 4 fields: id, time, lat and long.

In [25]:
import pandas as pd
from datetime import datetime, timedelta 
df = pd.read_csv('03.01.2020.csv')
df.time = pd.to_datetime(df.time)

df.head(5)

Unnamed: 0,time,id,lat,long
0,2020-03-30 04:15:36.186,90.59.53.79,48.186268,0.652216
1,2020-03-30 04:15:35.722,83.193.39.0,43.506,-1.477
2,2020-03-30 04:15:36.233,176.145.161.215,43.693695,5.033779
3,2020-03-30 04:15:36.228,83.199.173.224,48.730705,2.58764
4,2020-03-30 04:15:36.404,77.147.33.204,44.9154,-0.427


With that kind of input files, you can then use the following function to build the graph.

In [26]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta 

def build_graph(df,graph_file_name):
    proximity_threshold =  0.000015 # (approxi 1m); Important assumption, check for reasonable values and always report values chosen
    time_threshold = 10 # Important assumption, check for reasonable values and always report values chosen
    contact_list = []
    for index, row in df.iterrows():

       # print (index)
       # print (row)
        person = row['id']
        time = row['time'].to_pydatetime() # shitty bug hot fix: time becomes <class 'pandas._libs.tslibs.timestamps.Timestamp'> instead of datetime
       # print (type(time))
        lat = row['lat']
        long = row['long']


        # Get rows whose latitude and longitude are close to the "row" (by proximity_threshold)
        geo_slice = df[(df['lat'] < lat + proximity_threshold) & (df['lat'] > lat - proximity_threshold )]
        geo_slice = geo_slice[( geo_slice['long'] < long + proximity_threshold) & (geo_slice['long'] > long - proximity_threshold )]

        # Among the previous rows, find rows who are temporally close 
        t_slice_minus = (time + timedelta(minutes=-10))
        t_slice_plus = (time + timedelta(minutes=+10))
        # print ('t-',t_slice_minus)
        # print ('t+',t_slice_plus)
        time_slice =  geo_slice[( geo_slice['time'] < t_slice_plus) & (geo_slice['time'] > t_slice_minus )]
        time_slice = time_slice[time_slice['id'] != person]

        #print (index,  len(time_slice.index))
        contact_list.append([person, time.strftime("%m/%d/%Y, %H:%M:%S"),lat,long, time_slice['id'].to_list() ])

    contact_list =  pd.DataFrame(contact_list,  columns = ['id', 'time', 'lat','long','contact_list'])
    contact_list.to_csv(graph_file_name,  index=False)
    return

For example:

In [27]:
build_graph(df, 'cg_03.01.2020.csv')

# Expected result

If the build_graph function worked, it should give us a .csv file as the following.

In [28]:
df = pd.read_csv('cg_03.01.2020.csv')
df.head(10)

Unnamed: 0,id,time,lat,long,contact_list
0,90.59.53.79,"03/30/2020, 04:15:36",48.186268,0.652216,[]
1,83.193.39.0,"03/30/2020, 04:15:35",43.506,-1.477,[]
2,176.145.161.215,"03/30/2020, 04:15:36",43.693695,5.033779,[]
3,83.199.173.224,"03/30/2020, 04:15:36",48.730705,2.58764,[]
4,77.147.33.204,"03/30/2020, 04:15:36",44.9154,-0.427,[]
5,92.184.110.0,"03/30/2020, 04:15:36",45.933,-0.955,[]
6,92.184.97.180,"03/30/2020, 04:15:36",48.900901,2.165352,[]
7,80.215.102.240,"03/30/2020, 04:15:37",47.184177,3.013118,[]
8,88.122.126.130,"03/30/2020, 04:15:37",45.77176,4.916659,[]
9,176.156.209.0,"03/30/2020, 04:15:37",43.586,-1.269,[]


Again, we assume that, build_graph function is applied to a csv per day (name of each file is simply the date) and output is named as that same day, but with a prefix cg_