# Installation :


Start from this first cell if you are on google colab :

In [0]:
!git clone https://github.com/scikit-mobility/scikit-mobility.git
!cd scikit-mobility && python3 setup.py install
!pip3 install scikit-mobility
!git clone https://github.com/IhabBendidi/morocco.geojson.git

Be sure to restart the runtime afterward (on google colab) so that the installation really take place.

Start from this next cell if you got the notebook on local machine :

In [0]:
import geopandas as gpd  
import pandas as pd
import skmob
from skmob.models.epr import DensityEPR
from skmob.models.epr import Ditras
from skmob.models.markov_diary_generator import MarkovDiaryGenerator
from skmob.preprocessing import filtering, compression, detection, clustering

Setting up the start time of the simulation, between 1/1/2019 and 15/1/2019

In [0]:
# Helper function for protecting against duplicates ids
def change_uid(x,l):
  x = str(x) + l
  return x

In [0]:
# starting and end times of the simulation
start_time = pd.to_datetime('2019/01/01 08:00:00')
end_time = pd.to_datetime('2019/01/15 08:00:00')

### Irfane simulation

In [260]:
# Getting irfane data (rabat)
irfane_tessellation = gpd.read_file('morocco.geojson/irfane.geojson')
depr = DensityEPR()
#generating and filtering data to make it as close to real as possible
density_tdf = depr.generate(start_time, end_time, irfane_tessellation, relevance_column='population', n_agents=120)
density_tdf = filtering.filter(density_tdf, max_speed_kmh=5.)
density_tdf = detection.stops(density_tdf, stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)

  if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
  if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
  if not x.flags.writeable:


In [0]:
# changing ids of citizens to not have duplicated later on
density_tdf["uid"] = density_tdf["uid"].apply(change_uid,args=["a"])

We will now generate habits of agents, through the markov diary generator, for Al Irfane

In [0]:
# load and preprocess data to train the MarkovDiaryGenerator
url = 'https://raw.githubusercontent.com/scikit-mobility/scikit-mobility/master/tutorial/data/geolife_sample.txt.gz'
df = pd.read_csv(url, sep=',', compression='gzip')
tdf = skmob.TrajDataFrame(df, latitude='lat', longitude='lon', user_id='user', datetime='datetime')
#ctdf = compression.compress(tdf)
stdf = detection.stops(tdf,stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)
markov_tdf = clustering.cluster(stdf)

In [263]:
# instantiate and train the MarkovDiaryGenerator
mdg = MarkovDiaryGenerator()
mdg.fit(markov_tdf, 2, lid='cluster')

100%|██████████| 2/2 [00:00<00:00,  3.41it/s]


In [264]:
# instantiate the model
ditras = Ditras(mdg)
# run the model
ditras_tdf = ditras.generate(start_time, end_time, irfane_tessellation, relevance_column='population',
                    n_agents=150, od_matrix=None, show_progress=True)

  if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
  if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
  if not x.flags.writeable:
100%|██████████| 150/150 [00:02<00:00, 68.87it/s]


In [0]:
# changing ids of citizens to not have duplicated later on
ditras_tdf["uid"] = ditras_tdf["uid"].apply(change_uid,args=["b"])

In [0]:
irfan_tdf = pd.concat([ditras_tdf,density_tdf],ignore_index=True)

In [0]:
irfan_tdf.to_csv("irfan_mobility.csv")

### Agdal Simulation

In [268]:
# Getting Agdal data (rabat)
agdal_tessellation = gpd.read_file('morocco.geojson/agdal.geojson')
depr = DensityEPR()
#generating and filtering data to make it as close to real as possible
density_tdf = depr.generate(start_time, end_time, agdal_tessellation, relevance_column='population', n_agents=120)
density_tdf = filtering.filter(density_tdf, max_speed_kmh=5.)
density_tdf = detection.stops(density_tdf, stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)

  if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
  if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
  if not x.flags.writeable:


In [0]:
# changing ids of citizens to not have duplicated later on
density_tdf["uid"] = density_tdf["uid"].apply(change_uid,args=["c"])

In [270]:
# load and preprocess data to train the MarkovDiaryGenerator
url = 'https://raw.githubusercontent.com/scikit-mobility/scikit-mobility/master/tutorial/data/geolife_sample.txt.gz'
df = pd.read_csv(url, sep=',', compression='gzip')
tdf = skmob.TrajDataFrame(df, latitude='lat', longitude='lon', user_id='user', datetime='datetime')
#ctdf = compression.compress(tdf)
stdf = detection.stops(tdf,stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)
markov_tdf = clustering.cluster(stdf)
# instantiate and train the MarkovDiaryGenerator
mdg = MarkovDiaryGenerator()
mdg.fit(markov_tdf, 2, lid='cluster')

100%|██████████| 2/2 [00:00<00:00,  3.45it/s]


In [271]:
# instantiate the model
ditras = Ditras(mdg)
# run the model
ditras_tdf = ditras.generate(start_time, end_time, agdal_tessellation, relevance_column='population',
                    n_agents=150, od_matrix=None, show_progress=True)

  if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
  if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
  if not x.flags.writeable:
100%|██████████| 150/150 [00:02<00:00, 71.07it/s]


In [0]:
# changing ids of citizens to not have duplicated later on
ditras_tdf["uid"] = ditras_tdf["uid"].apply(change_uid,args=["d"])

In [0]:
agdal_tdf = pd.concat([ditras_tdf,density_tdf],ignore_index=True)

In [0]:
agdal_tdf.to_csv("agdal_mobility.csv")

### Aggregate results

In [275]:
# Getting Agdal data (rabat)
dual_tessellation = gpd.read_file('morocco.geojson/agdal_irfane.geojson')
depr = DensityEPR()
#generating and filtering data to make it as close to real as possible
density_tdf = depr.generate(start_time, end_time, dual_tessellation, relevance_column='population', n_agents=120)
density_tdf = filtering.filter(density_tdf, max_speed_kmh=5.)
density_tdf = detection.stops(density_tdf, stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)

  if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
  if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
  if not x.flags.writeable:


In [0]:
# changing ids of citizens to not have duplicated later on
density_tdf["uid"] = density_tdf["uid"].apply(change_uid,args=["e"])

In [277]:
# load and preprocess data to train the MarkovDiaryGenerator
url = 'https://raw.githubusercontent.com/scikit-mobility/scikit-mobility/master/tutorial/data/geolife_sample.txt.gz'
df = pd.read_csv(url, sep=',', compression='gzip')
tdf = skmob.TrajDataFrame(df, latitude='lat', longitude='lon', user_id='user', datetime='datetime')
#ctdf = compression.compress(tdf)
stdf = detection.stops(tdf,stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)
markov_tdf = clustering.cluster(stdf)
# instantiate and train the MarkovDiaryGenerator
mdg = MarkovDiaryGenerator()
mdg.fit(markov_tdf, 2, lid='cluster')

100%|██████████| 2/2 [00:00<00:00,  3.41it/s]


In [278]:
# instantiate the model
ditras = Ditras(mdg)
# run the model
ditras_tdf = ditras.generate(start_time, end_time, dual_tessellation, relevance_column='population',
                    n_agents=150, od_matrix=None, show_progress=True)

  if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
  if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
  if not x.flags.writeable:
100%|██████████| 150/150 [00:02<00:00, 68.45it/s]


In [0]:
# changing ids of citizens to not have duplicated later on
ditras_tdf["uid"] = ditras_tdf["uid"].apply(change_uid,args=["f"])

In [0]:
dual_tdf = pd.concat([ditras_tdf,density_tdf],ignore_index=True)

In [0]:
simulation_tdf = pd.concat([agdal_tdf,irfan_tdf,dual_tdf],ignore_index=True)

In [0]:
simulation_tdf.to_csv('simulation_data.csv')

# Data cleaning and preparation

In [0]:
import pandas as pd
simulation_tdf = pd.read_csv('simulation_data.csv')

In [0]:
import json
from shapely.geometry import shape, Point

In [0]:
# load GeoJSON file containing sectors
with open('morocco.geojson/agdal_irfane.geojson', 'r') as f:
  js = json.load(f)

In [0]:
def get_zone_id(x):
  point = Point( x["lng"],x["lat"])
  zone_id = ""
  for feature in js['features']:
    polygon = shape(feature['geometry'])
    if polygon.contains(point):
      zone_id = feature['properties']['id']
  x['zone_id'] = zone_id
  return x

In [0]:
# Adding the ids of the zones 
simulation_tdf = simulation_tdf.apply(get_zone_id,axis=1)

# Filtering zones without a registered zone id
simulation_tdf = simulation_tdf[simulation_tdf['zone_id'] != ""][["uid","datetime","lat","lng","zone_id"]]

In [0]:
from datetime import datetime
def get_date_precision(x):
  x['datetime_clipped'] = datetime.strptime(x['datetime'].split(':')[0]+":"+x['datetime'].split(':')[1], '%Y-%m-%d %H:%M')#:%M:%S')
  return x

In [0]:
# Clip datetime by the hour
simulation_tdf = simulation_tdf.apply(get_date_precision,axis=1)

In [0]:
import geopy.distance

def get_distance(x):
  coords_1 = (x['lat_x'], x['lng_x'])
  coords_2 = (x['lat_y'], x['lng_y'])
  km_distance = geopy.distance.distance(coords_1, coords_2).km
  x['distance'] = km_distance * 1000
  return x

In [0]:
merged_tdf = simulation_tdf.merge(simulation_tdf, how='left',on=["datetime_clipped","zone_id"])

# Filter to not keep couples if they have same id
merged_tdf = merged_tdf[merged_tdf['uid_x'] != merged_tdf['uid_y']]

# Compute distance between each two citizens
merged_tdf = merged_tdf.apply(get_distance,axis=1)

# Filter for distances lower than 7 meters between citizens
merged_tdf = merged_tdf[merged_tdf['distance']< 7]

In [0]:
# Getting users around in each time 
temp_tdf = merged_tdf.groupby(['uid_x','datetime_clipped','zone_id'])['uid_y'].apply(','.join).reset_index()
temp_tdf = temp_tdf.rename(columns = {'uid_y':'ble_users'})
ble_tdf = merged_tdf[['uid_x','datetime_x','lat_x','lng_x','zone_id','datetime_clipped']].merge(temp_tdf,how='inner', on=['uid_x','datetime_clipped','zone_id']).drop_duplicates()
ble_tdf = ble_tdf.rename(columns = {'uid_x':'uid','datetime_x':'datetime','lat_x':'lat','lng_x':'lng'})

# Delete the first second of the simulation, and last second of the simulation, as a lot of people are near each other in it
ble_tdf = ble_tdf[ble_tdf['datetime'] != '2019-01-01 08:00:00.000000']
ble_tdf = ble_tdf[ble_tdf['datetime'] != '2019-01-15 08:00:00.000000']
ble_tdf = ble_tdf.reset_index()[['uid','datetime','lat','lng','zone_id','datetime_clipped','ble_users']]

In [293]:
ble_tdf.head()

Unnamed: 0,uid,datetime,lat,lng,zone_id,datetime_clipped,ble_users
0,1d,2019-01-01 15:00:00.000000,34.000416,-6.857033,Gare,1546354800000000000,"86d,45f,62f"
1,1d,2019-01-01 18:00:00.000000,34.000032,-6.854877,ENIM,1546365600000000000,"86d,123d,146d,62f"
2,1d,2019-01-02 11:00:00.000000,34.000032,-6.854877,ENIM,1546426800000000000,"16d,103d,125d,146d"
3,1d,2019-01-02 17:00:00.000000,33.999314,-6.847532,Maisons 1,1546448400000000000,"96d,99d,131d"
4,1d,2019-01-03 08:00:00.000000,34.000032,-6.854877,ENIM,1546502400000000000,"31d,36d,73d,86d,109d,123d,143d,26f"


In [0]:
ble_tdf.to_csv('full_mobility_data.csv')

# Covid modelisation :


In [0]:
import pandas as pd 
ble_tdf = pd.read_csv('full_mobility_data.csv')

In [0]:
from random import seed
from random import randint

### Infected scoring

In [0]:
percentage_infected = .004 
number_infected = int(ble_tdf['uid'].unique().shape[0] * percentage_infected)

In [0]:
# Selecting random infected people
seed(3)
uids_infected = []
for i in range(number_infected):
  idex = randint(0,ble_tdf['uid'].unique().shape[0])
  uids_infected.append(ble_tdf['uid'].unique()[idex])

In [299]:
uids_infected

['94c', '67f', '18f']

A question we are facing, is whether when we detect that someone is covid positive, does their earlier risk scores reflect/predict that they would be positive? 

Logical answer : Not always, as heavily dependant on whether everybody installed the application. In the case not everybody did, the infected might have contacted the infection from people who didnt own phones, and thus his score shouldnt have changed much.

This means we need to give some measure of risk score for earlier periods after detection of infection in someone, irrelevant of his earlier scores.

In [0]:
from scipy.stats import norm
import numpy as np
import datetime as dat

In [0]:
def generate_infected_earlier_scores(infected_id,simulation_start,simulation_end,symptom_apparition_date,score_distribution):
  ## Should we factor in whether they have ended up meeting another infected person?? Maybe they have more virality if so

  # Number of days in the simulation
  nb_days_simulation = (simulation_end - simulation_start).days
  #index of the day of apparition of symptoms
  symptom_day  = (symptom_apparition_date - simulation_start).days
  output = {}
  scores = []
  for i in range(0,nb_days_simulation +1):
    date_scores = {}
    start_index = 14 - symptom_day
    date_scores['date'] = simulation_start + dat.timedelta(days=i)
    date_scores['score'] = score_distribution[start_index + i + 1]
    scores.append(date_scores)
  output['infected_uid'] = infected_id
  output['scores'] = scores 
  return output

In [0]:
def get_simulation_infected_scores(uids_infected,simulation_start,simulation_end):
  seed(1)
  nb_days_simulation = (simulation_end - simulation_start).days

  # a normal distribution of the scores of a person already infected, where the period of symptom apparition shows the most virality charge
  x = np.linspace(norm.ppf(0.1),norm.ppf(0.99), nb_days_simulation * 2)
  score_distribution = norm.pdf(x, 0.5, 0.5)*100

  output = []

  for infected in uids_infected :
    symptom_date = simulation_start + dat.timedelta(days=randint(0,nb_days_simulation))
    date_scores = generate_infected_earlier_scores(infected,simulation_start,simulation_end,symptom_date,score_distribution)
    output.append(date_scores)
  return output

In [0]:
infected_scores = get_simulation_infected_scores(uids_infected,start_time,end_time)

In [304]:
infected_scores

[{'infected_uid': '94c',
  'scores': [{'date': Timestamp('2019-01-01 08:00:00'),
    'score': 79.47428297373659},
   {'date': Timestamp('2019-01-02 08:00:00'), 'score': 78.52849795470743},
   {'date': Timestamp('2019-01-03 08:00:00'), 'score': 72.24521999077103},
   {'date': Timestamp('2019-01-04 08:00:00'), 'score': 61.8831055257222},
   {'date': Timestamp('2019-01-05 08:00:00'), 'score': 49.35330458139002},
   {'date': Timestamp('2019-01-06 08:00:00'), 'score': 36.64726170149365},
   {'date': Timestamp('2019-01-07 08:00:00'), 'score': 25.336579093431915},
   {'date': Timestamp('2019-01-08 08:00:00'), 'score': 16.30930967047769},
   {'date': Timestamp('2019-01-09 08:00:00'), 'score': 9.774720232525855},
   {'date': Timestamp('2019-01-10 08:00:00'), 'score': 5.4544912352361035},
   {'date': Timestamp('2019-01-11 08:00:00'), 'score': 2.833905182148495},
   {'date': Timestamp('2019-01-12 08:00:00'), 'score': 1.3708739931178504},
   {'date': Timestamp('2019-01-13 08:00:00'), 'score': 0.61

In [0]:
ble_tdf['scores'] = 0.
ble_tdf['infected'] = False

In [306]:
ble_tdf.head()

Unnamed: 0.1,Unnamed: 0,uid,datetime,lat,lng,zone_id,datetime_clipped,ble_users,scores,infected
0,0,1d,2019-01-01 15:00:00.000000,34.000416,-6.857033,Gare,1546354800000000000,"86d,45f,62f",0.0,False
1,1,1d,2019-01-01 18:00:00.000000,34.000032,-6.854877,ENIM,1546365600000000000,"86d,123d,146d,62f",0.0,False
2,2,1d,2019-01-02 11:00:00.000000,34.000032,-6.854877,ENIM,1546426800000000000,"16d,103d,125d,146d",0.0,False
3,3,1d,2019-01-02 17:00:00.000000,33.999314,-6.847532,Maisons 1,1546448400000000000,"96d,99d,131d",0.0,False
4,4,1d,2019-01-03 08:00:00.000000,34.000032,-6.854877,ENIM,1546502400000000000,"31d,36d,73d,86d,109d,123d,143d,26f",0.0,False


In [0]:
def get_infected(x,uids_infected,infected_scores):
  if x['uid'] in uids_infected :
    x['infected'] = True 
    # Index of the scores of the uid
    uid_index = uids_infected.index(x['uid'])
    scores = infected_scores[uid_index]['scores']
    pd_date = x['datetime'].split(' ')[0]
    for date_score in scores :
      infected_date = str(date_score['date']).split(' ')[0]
      if infected_date == pd_date :
        x['scores'] = date_score['score']
  return x


In [0]:
infected_tdf = ble_tdf.apply(get_infected,args=[uids_infected,infected_scores],axis=1)[['uid','datetime' 	,'lat' 	,'lng', 	'zone_id' ,	'datetime_clipped' ,	'ble_users' ,	'scores' ,	'infected']]

In [309]:
infected_tdf[infected_tdf['infected']==True].head()

Unnamed: 0,uid,datetime,lat,lng,zone_id,datetime_clipped,ble_users,scores,infected
4845,94c,2019-01-02 21:42:28.661410,34.002296,-6.852955,Carrefour,1546465320000000000,35c,78.528498,True
4846,94c,2019-01-03 04:53:53.777515,33.999689,-6.852435,EMI,1546491180000000000,17c,72.24522,True
4847,94c,2019-01-03 18:20:34.568443,34.000416,-6.857033,Gare,1546539600000000000,21c,72.24522,True
4848,94c,2019-01-05 19:29:48.139054,33.999689,-6.852435,EMI,1546716540000000000,52c,49.353305,True
4849,94c,2019-01-07 00:20:13.764394,34.000416,-6.857033,Gare,1546820400000000000,38c,25.336579,True


The next step is to add the scores of the people at risk, which are the people that met the infected people.

For our test purposes, we will give the risky people the quarter of the risk of infected people they met, with an additive score (the more they meet, the higher it is). This score can be modified further as we test more.

In [0]:
# The quotient of the score passed from infected people to people in risk, for test purposes only
virality_propagation = .25

In [0]:
infected_tdf['potentially_infected'] = False

In [312]:
infected_tdf.head()

Unnamed: 0,uid,datetime,lat,lng,zone_id,datetime_clipped,ble_users,scores,infected,potentially_infected
0,1d,2019-01-01 15:00:00.000000,34.000416,-6.857033,Gare,1546354800000000000,"86d,45f,62f",0.0,False,False
1,1d,2019-01-01 18:00:00.000000,34.000032,-6.854877,ENIM,1546365600000000000,"86d,123d,146d,62f",0.0,False,False
2,1d,2019-01-02 11:00:00.000000,34.000032,-6.854877,ENIM,1546426800000000000,"16d,103d,125d,146d",0.0,False,False
3,1d,2019-01-02 17:00:00.000000,33.999314,-6.847532,Maisons 1,1546448400000000000,"96d,99d,131d",0.0,False,False
4,1d,2019-01-03 08:00:00.000000,34.000032,-6.854877,ENIM,1546502400000000000,"31d,36d,73d,86d,109d,123d,143d,26f",0.0,False,False


In [0]:
temp_infected = infected_tdf[infected_tdf['infected']==True]

In [314]:
temp_infected.head()

Unnamed: 0,uid,datetime,lat,lng,zone_id,datetime_clipped,ble_users,scores,infected,potentially_infected
4845,94c,2019-01-02 21:42:28.661410,34.002296,-6.852955,Carrefour,1546465320000000000,35c,78.528498,True,False
4846,94c,2019-01-03 04:53:53.777515,33.999689,-6.852435,EMI,1546491180000000000,17c,72.24522,True,False
4847,94c,2019-01-03 18:20:34.568443,34.000416,-6.857033,Gare,1546539600000000000,21c,72.24522,True,False
4848,94c,2019-01-05 19:29:48.139054,33.999689,-6.852435,EMI,1546716540000000000,52c,49.353305,True,False
4849,94c,2019-01-07 00:20:13.764394,34.000416,-6.857033,Gare,1546820400000000000,38c,25.336579,True,False


In [0]:
risk_array = []

# Getting risk scores for each minute for people having met infected people
for row in temp_infected.itertuples(index=True, name='Pandas'):
    risky_list = row.ble_users.split(',')
    date_clipped = row.datetime_clipped
    risk_score = row.scores * virality_propagation
    for uid in risky_list :
      temp_dict = {'uid':uid,'datetime_clipped':date_clipped,'risk_scores':risk_score} 
      risk_array.append(temp_dict)

risk_df = pd.DataFrame(risk_array)

In [0]:
# Summing risks scores of people that are in the same instant to drop duplicates
risk_df = risk_df.groupby(['uid','datetime_clipped'])['risk_scores'].apply(sum).reset_index()

In [0]:
# Compounding risk whenever he meets a new infected person
temp_compound = infected_tdf.merge(risk_df,how='inner',on=['uid'])
temp_compound = temp_compound[['uid','datetime','datetime_clipped_x']]

temp_compound = temp_compound.rename(columns = {'datetime_clipped_x':'datetime_clipped'})
temp_compound = temp_compound.merge(risk_df,how='left',on=['uid','datetime_clipped']).fillna(0)

In [0]:
temp_uid = ""
earlier_score = 0
risk_array = []
for row in temp_compound.itertuples(index=True, name='Pandas'):
  if row.uid != temp_uid:
    temp_uid = row.uid
    earlier_score = 0 
  current_score = row.risk_scores 
  final_risk_scores = earlier_score + current_score 
  earlier_score = final_risk_scores
  temp_dict = {'uid':row.uid,'datetime_clipped':row.datetime_clipped,'risk_scores':final_risk_scores} 
  risk_array.append(temp_dict)
risk_df = pd.DataFrame(risk_array)

In [0]:
def filter_risks(x):
  if x['risk_scores'] != 0 :
    x['potentially_infected'] = True 
    x['scores'] = x['risk_scores'] 
  return x

In [0]:
# Merging risk scores back 
infected_tdf = infected_tdf.merge(risk_df,how='left',on=['uid','datetime_clipped']).fillna(0)

In [0]:
scores_tdf = infected_tdf.apply(filter_risks,axis=1)

In [0]:
scores_tdf = scores_tdf[['uid','datetime','zone_id','ble_users','scores','infected','potentially_infected']]

In [0]:
# For people that met a lot of infected people in the same instant, we only keep the total score of meeting them all
scores_tdf = scores_tdf.drop_duplicates(subset=['uid','datetime','zone_id','ble_users','infected','potentially_infected'],keep="last")

In [324]:
scores_tdf[scores_tdf['potentially_infected']==True].head()

Unnamed: 0,uid,datetime,zone_id,ble_users,scores,infected,potentially_infected
3858,17c,2019-01-03 04:53:16.389306,EMI,94c,18.061305,False,True
3859,17c,2019-01-03 10:15:04.043319,ENIM,83c,18.061305,False,True
3860,17c,2019-01-03 15:06:35.864254,ENIM,91c,18.061305,False,True
3861,17c,2019-01-04 02:14:58.926959,EMI,60c,18.061305,False,True
3862,17c,2019-01-05 01:04:28.473890,EMI,69c,18.061305,False,True


In [325]:
scores_tdf[scores_tdf['infected']==True].head()

Unnamed: 0,uid,datetime,zone_id,ble_users,scores,infected,potentially_infected
4845,94c,2019-01-02 21:42:28.661410,Carrefour,35c,78.528498,True,False
4846,94c,2019-01-03 04:53:53.777515,EMI,17c,72.24522,True,False
4847,94c,2019-01-03 18:20:34.568443,Gare,21c,72.24522,True,False
4848,94c,2019-01-05 19:29:48.139054,EMI,52c,49.353305,True,False
4849,94c,2019-01-07 00:20:13.764394,Gare,38c,25.336579,True,False


In [0]:
scores_tdf.to_csv('user_scores.csv')

In [327]:
scores_tdf.shape

(14169, 7)