# GEOCOVID-selectionClusters.ipynb

Selection of COVID-19 clusters for genome sequencing.

In [1]:
#LIBRARIES
import pandas as pd
import geopandas as gpd
import numpy as np
import getpass
from sqlalchemy import create_engine
import psycopg2 as ps

In [2]:
#PATHS
outdir='../outputs/cluster_selection/'

In [3]:
#CONNECT TO DB WITH user=aladoy
pw=getpass.getpass() #Ask for user password
engine=create_engine("postgresql+psycopg2://aladoy:{}@localhost/geocovid".format(pw)) #Create SQLAlchemy engine
conn=ps.connect("dbname='geocovid' user='aladoy' host='localhost' password='{}'".format(pw)) #Create a connection object
cursor=conn.cursor() #Create a cursor object

 ·········


In [4]:
#IMPORT DATA
#Genomes that were sequenced (cf. mail Damien Jacquot 16/02/2021)
sequenced=pd.read_excel('../data/df_merged_GEOCOVID-2.xlsx', engine='openpyxl')
sequenced=sequenced.drop_duplicates('id_demande')[['clusterID','startDate','endDate','observedCases','id_demande']]
sequenced=tuple(sequenced['id_demande'].tolist())
#Extract info from database (location of individuals +++)
sequenced=gpd.read_postgis("SELECT id_demande, date_reception, charge_virale, cat_charge_virale,geometry FROM covid_tests_vd WHERE id_demande IN {}".format(sequenced),conn,geom_col='geometry')
#Save to geojson
sequenced.to_file(outdir+'sequenced.geojson',driver='GeoJSON')

## Functions

In [5]:
#FUNCTION THAT RETURNS CASES WITHIN A SPECIFIC SIGNIFICANT CLUSTER THAT WERE INCLUDED IN SATSCAN ANALYSIS
#Arguments:
#   cluster_id: cluser of interest (must be significant since we didn't save backup SaTScan cases for non significant clusters)
#   save: if we want to save the results in geojson
#Outputs:
#   clusterCasesAnalysis: list of cases that were included in SaTScan analysis for the given cluster
def casesIncludedSaTScan(cluster_id, save=True):
    #Read file
    casesAnalysis=gpd.read_file('../../Part1_Description/outputs/COVID_satscan/ByRELI/ByDays/cluster_cases.gpkg')
    casesAnalysis['end_date']=pd.to_datetime(casesAnalysis.end_date)
    #Add unique ID of clusters from the database
    clustIDs=pd.read_sql_query("SELECT id, cluster, end_date FROM satscan_clusters_firstvague WHERE significant=True", conn)
    #Dataframe with significant clusters + within id_demande
    casesAnalysis=pd.merge(casesAnalysis,clustIDs, how='inner', on=['cluster','end_date'])

    #Extract cases within the cluster of interest included in SaTScan analysis
    clusterCasesAnalysis=tuple(casesAnalysis[casesAnalysis.id==cluster_id]['id_demande'].tolist())
    clusterCasesAnalysis=gpd.read_postgis("SELECT * FROM covid_tests_vd WHERE id_demande IN {}".format(clusterCasesAnalysis),conn,geom_col='geometry')

    if save==True:
        clusterCasesAnalysis.to_file(outdir+'qgis/casesAnalysis'+str(cluster_id)+'.geojson',driver='GeoJSON')

    return clusterCasesAnalysis

In [6]:
#FUNCTION THAT RETURNS CLUSTERS FOR A GIVEN AREA
#Arguments:
#   loc: desired area (name of the municipality)
#   seq: list of all sequenced individuals
#   loc_type: municipality (mun) or postal code (npa)
#Outputs:
#   optionsClust: clusters from locClust that contains at least one individual that have been already sequenced + their caracteristics (in order to select the best match)
#   casesClust: cluster and their cases (geometric intersection so not necessarly the cases included in SaTScan analyses)

def clustersLocation(loc, seq, loc_type='mun'):

    #Clusters that intersect the location
    
    if loc_type=='npa':
        sql="SELECT cl.id, cl.start_date, cl.end_date, cl.significant, cl.observed, cl.duration, cl.geometry, c.id_demande, c.note_geocoding \
        FROM (SELECT id_demande, date_reception, note_geocoding, geometry FROM covid_tests_vd WHERE res_cov=1) as c INNER JOIN \
        (SELECT s.* FROM satscan_clusters_firstvague s, npa n WHERE n.zipcode={} AND st_intersects(s.geometry, n.geometry)) cl \
        ON st_intersects(cl.geometry, c.geometry) WHERE (c.date_reception>=cl.start_date AND c.date_reception<=cl.end_date)".format(loc)
    else:
        sql="SELECT cl.id, cl.start_date, cl.end_date, cl.significant, cl.observed, cl.duration, cl.geometry, c.id_demande, c.note_geocoding \
        FROM (SELECT id_demande, date_reception, note_geocoding, geometry FROM covid_tests_vd WHERE res_cov=1) as c INNER JOIN \
        (SELECT s.* FROM satscan_clusters_firstvague s, municipalities m WHERE m.name='{}' AND st_intersects(s.geometry, m.geometry)) cl \
        ON st_intersects(cl.geometry, c.geometry) WHERE (c.date_reception>=cl.start_date AND c.date_reception<=cl.end_date)".format(loc)
    casesClust=gpd.read_postgis(sql,conn,geom_col='geometry')

    #Add dummy var for merge
    sequenced['sequenced']=1
    casesClust=pd.merge(casesClust,sequenced[['id_demande','sequenced']], how='left', on='id_demande')
    casesClust['geo_building']=np.where(casesClust.note_geocoding.str.startswith('Geocoded at building.'),1,np.nan)

    #For each cluster, compute the number of cases considered in SaTScan analysis (observed), the number of cases within cluster if we are doing a geometric intersection with residential coordinates (nb_cases)
    # including the number that are geocoded at building (nb_geoBuilding) and the number that have been already sequenced (nb_sequenced)
    optionsClust=casesClust.groupby(['id','duration','significant','observed']).agg({'id_demande':'count','geo_building':'count','sequenced':'count'}).reset_index()
    optionsClust.columns=['id','duration','significant','observed','nb_cases','nb_geoBuilding','nb_sequenced']
    optionsClust=optionsClust.sort_values('nb_sequenced',ascending=False)

    print('Number of potential clusters we can select: ' + str(optionsClust.shape[0]))
    print('Number of sequenced genomes in this area: ' + str(casesClust.drop_duplicates('id_demande').sequenced.sum()))

    return optionsClust, casesClust

In [7]:
#FUNCTION THAT RETURNS INDIVIDUALS THAT WE MUST SEQUENCED
#Arguments:
#   optionsClust: list of clusters in the desired area that already contain sequenced individuals
#   selected_cluster: id of cluster we selected in the list cluster_options
#   casesClust: cluster and their cases (geometric intersection so not necessarly the cases included in SaTScan analyses)
#Outputs:
#   finalCases: list of individuals to sequence

def ind2sequenced(optionsClust, selected_cluster, casesClust): #selected_cluster=id of the cluster we keep
    #Caracteristics of the cluster selected
    selectedClust=gpd.read_postgis("SELECT s.* FROM satscan_clusters_firstvague s WHERE s.id={}".format(selected_cluster), conn, geom_col='geometry')
    
    #If the number of cases included in SaTScan is larger than the number of cases within cluster, select the id_demande in casesAnalysis (only works for significant clusters)
    if (selectedClust.significant.all()==True) & ((optionsClust.loc[optionsClust.id==selected_cluster,'observed']>optionsClust.loc[optionsClust.id==selected_cluster,'nb_cases']).all()==True):
        #Extract cases used in SaTScan for this specific cluster
        finalCases=tuple(casesIncludedSaTScan(selected_cluster, False)['id_demande'].tolist())
        #Select more info about these cases in database
        finalCases=gpd.read_postgis("SELECT * FROM covid_tests_vd WHERE id_demande IN {}".format(finalCases),conn,geom_col='geometry')
        #Add info in the final dataframe about the cases that were already sequenced and the ones that fall outside the cluster (inside==Nan)
        finalCases=pd.merge(finalCases,casesClust[casesClust.id==selected_cluster][['id_demande','sequenced','geo_building']],how='left',on='id_demande')
        finalCases['inside']=np.where(finalCases.id_demande.isin(casesClust[casesClust.id==selected_cluster].id_demande),1,np.nan)

    else:
        finalCases=tuple(casesClust[casesClust.id==selected_cluster].id_demande.tolist())
        #Select more info about these cases in database
        finalCases=gpd.read_postgis("SELECT * FROM covid_tests_vd WHERE id_demande IN {}".format(finalCases),conn,geom_col='geometry')
        finalCases=pd.merge(finalCases,casesClust[casesClust.id==selected_cluster][['id_demande','sequenced','geo_building']],how='left',on='id_demande')
        finalCases['inside']=1
        
    print('New cases to sequence: '+str(finalCases.shape[0]-finalCases.sequenced.sum()))
    #Save file    
    finalCases.drop('geometry',axis=1).to_csv(outdir+'cases_'+str(selected_cluster)+'.csv',index=False)
    selectedClust.drop('geometry',axis=1).to_csv(outdir+'cluster_'+str(selected_cluster)+'.csv',index=False)


## Locations

### Nyon

In [23]:
optionsClustNyon, casesClustNyon= clustersLocation('Nyon', sequenced)

Number of potential clusters we can select: 64
Number of sequenced genomes in this area: 9.0


In [24]:
optionsClustNyon.head(20)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
46,794,14.0,True,22.0,19,19,4
12,259,5.0,True,9.0,7,7,3
30,539,10.0,False,10.0,9,9,3
35,600,11.0,False,11.0,10,10,3
26,486,9.0,False,12.0,10,10,3
23,430,8.0,False,12.0,10,10,3
39,670,12.0,False,14.0,11,11,3
17,367,7.0,True,9.0,8,8,3
15,336,5.0,False,9.0,9,6,3
14,313,6.0,True,10.0,8,8,3


In [25]:
%%capture 
casesIncludedSaTScan(794,True) #to supress output for the current cell

In [26]:
ind2sequenced(optionsClustNyon, 794, casesClustNyon)

New cases to sequence: 18.0


In [27]:
optionsClustNyon[optionsClustNyon.id.isin([588,661,733])]

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
42,733,11.0,True,5.0,1,1,0
34,588,9.0,True,5.0,1,1,0
38,661,10.0,True,5.0,1,1,0


Not enough individuals geocoded at the place of residence to keep this cluster.

### Vallée de Joux (Le Chenit)

In [59]:
optionsClustJoux, casesClustJoux= clustersLocation('Le Chenit', sequenced)

Number of potential clusters we can select: 27
Number of sequenced genomes in this area: 9.0


In [60]:
optionsClustJoux.head(15)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
13,352,9.0,True,35.0,34,27,7
10,201,9.0,True,25.0,24,18,7
18,644,14.0,True,43.0,44,36,7
17,574,13.0,True,41.0,42,34,7
16,514,12.0,True,38.0,37,30,7
15,461,11.0,True,36.0,35,28,7
14,411,10.0,True,36.0,35,28,7
12,298,8.0,True,30.0,29,23,7
11,246,7.0,True,28.0,27,21,7
9,156,8.0,True,17.0,15,10,5


In [61]:
casesIncludedJoux=casesIncludedSaTScan(514,True)

n°514 because max number of already sequenced genomes, longer cluster before it starts to grow a lot


![Cluster Joux](../outputs/cluster_selection/qgis/ClusterJoux.jpg)



In [62]:
ind2sequenced(optionsClustJoux, 514, casesClustJoux)

New cases to sequence: 31.0


### Morges

In [19]:
optionsClustMorges, casesClustMorges= clustersLocation('Morges', sequenced)

Number of potential clusters we can select: 84
Number of sequenced genomes in this area: 2.0


In [20]:
optionsClustMorges.head(25)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
14,316,5.0,True,11.0,10,8,2
28,596,10.0,False,17.0,17,11,2
32,676,11.0,False,17.0,17,11,2
25,532,9.0,True,16.0,15,10,2
19,431,7.0,False,13.0,12,9,2
16,363,6.0,True,13.0,12,9,2
22,476,8.0,True,15.0,14,9,2
9,190,6.0,False,6.0,5,5,1
5,131,2.0,False,4.0,3,2,1
6,139,5.0,False,5.0,5,5,1


In [21]:
casesIncludedMorges=casesIncludedSaTScan(532,True)

n°532

![Cluster Morges](../outputs/cluster_selection/qgis/ClusterMorges.jpg)

In [22]:
ind2sequenced(optionsClustMorges, 532, casesClustMorges)

New cases to sequence: 14.0


Choose also n°1285 because other significant cluster, with all cases falling inside(no pb with corresponding reli), small size

In [26]:
casesIncludedMorges=casesIncludedSaTScan(1285,True)

In [27]:
ind2sequenced(optionsClustMorges, 1285, casesClustMorges)

New cases to sequence: 5.0


### Lausanne

In [8]:
optionsClustLausanne, casesClustLausanne= clustersLocation('Lausanne', sequenced)

Number of potential clusters we can select: 613
Number of sequenced genomes in this area: 54.0


In [9]:
optionsClustLausanne.sort_values('duration',ascending=False).head(20)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
180,429,14.0,False,18.0,15,13,6
455,1239,14.0,True,14.0,9,9,0
480,1304,14.0,True,20.0,14,14,0
465,1266,14.0,True,23.0,16,16,0
471,1284,14.0,True,25.0,18,18,0
489,1322,14.0,True,19.0,9,9,0
474,1290,14.0,False,14.0,11,11,0
490,1323,14.0,True,17.0,8,8,0
476,1294,14.0,False,16.0,14,14,0
479,1297,14.0,False,16.0,23,16,0


Select several clusters listed below:

n°647

In [10]:
%%capture 
casesIncludedSaTScan(647,True) #to supress output for the current cell

In [11]:
ind2sequenced(optionsClustLausanne, 647, casesClustLausanne)

New cases to sequence: 19.0


n°92

In [12]:
%%capture 
casesIncludedSaTScan(92,True)

In [13]:
ind2sequenced(optionsClustLausanne, 92, casesClustLausanne)

New cases to sequence: 6.0


n°13

In [14]:
%%capture 
casesIncludedSaTScan(13,True)

In [15]:
ind2sequenced(optionsClustLausanne, 13, casesClustLausanne)

New cases to sequence: 2.0


n°414

In [16]:
%%capture 
casesIncludedSaTScan(414,True)

In [17]:
ind2sequenced(optionsClustLausanne, 414, casesClustLausanne)

New cases to sequence: 14.0


In [18]:
optionsClust1004, casesClust1004= clustersLocation(1004, sequenced, loc_type='npa')

Number of potential clusters we can select: 178
Number of sequenced genomes in this area: 7.0


In [19]:
optionsClust1004.head(10)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
62,528,13.0,True,19.0,22,12,3
63,531,11.0,True,18.0,15,15,3
24,196,8.0,False,7.0,7,3,3
27,216,5.0,False,8.0,5,5,2
49,421,9.0,True,14.0,9,9,2
21,166,4.0,True,8.0,5,5,2
17,142,3.0,False,5.0,4,4,2
12,110,2.0,False,4.0,4,4,2
1,21,4.0,False,3.0,1,1,1
41,347,8.0,False,9.0,9,5,1


In [20]:
%%capture 
casesIncludedSaTScan(531,True)

In [21]:
ind2sequenced(optionsClustLausanne, 531, casesClustLausanne)

New cases to sequence: 15.0


In [22]:
#Cluser Avenue du Grey
optionsClust1004[optionsClust1004.id.isin([422,983,1088,915,849,782,715,651,582,522,469])]

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
106,915,14.0,True,26.0,23,23,0
128,1088,12.0,True,20.0,27,14,0
114,983,14.0,True,22.0,17,17,0
98,849,14.0,True,27.0,24,24,0
50,422,7.0,True,14.0,14,14,0
83,715,12.0,True,25.0,23,23,0
90,782,13.0,True,26.0,23,23,0
56,469,8.0,True,16.0,15,15,0
61,522,9.0,True,15.0,14,14,0
74,651,11.0,True,19.0,15,15,0


In [23]:
%%capture 
casesIncludedSaTScan(422,True)

In [24]:
ind2sequenced(optionsClustLausanne, 422, casesClustLausanne)

New cases to sequence: 14.0


In [25]:
#Cluster of Avenue de Morges / Avenue d'Echallens
optionsClust1004[optionsClust1004.id.isin([728,657,663,783,793,914,848,980,985,1176,1211,1304,1322,1382,1391,1404,1417,1431,1444,1458,1470])]

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
105,914,14.0,True,27.0,12,12,0
113,980,14.0,True,27.0,15,15,0
115,985,14.0,True,20.0,31,16,0
161,1382,11.0,True,16.0,8,8,0
162,1391,12.0,True,21.0,11,11,0
163,1404,13.0,True,21.0,11,11,0
164,1417,14.0,True,23.0,13,13,0
165,1431,14.0,True,22.0,13,13,0
166,1444,14.0,True,17.0,12,12,0
167,1458,14.0,True,16.0,12,12,0


In [26]:
%%capture 
casesIncludedSaTScan(1470,True)

In [27]:
ind2sequenced(optionsClustLausanne, 1470, casesClustLausanne)

New cases to sequence: 14.0


1007

In [28]:
optionsClust1007, casesClust1007= clustersLocation(1007, sequenced, loc_type='npa')

Number of potential clusters we can select: 143
Number of sequenced genomes in this area: 10.0


In [29]:
optionsClust1007.head(20)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
36,412,5.0,True,5.0,5,5,3
69,785,11.0,True,5.0,5,5,3
44,499,12.0,False,14.0,13,13,3
45,515,7.0,True,5.0,5,5,3
43,494,14.0,False,5.0,6,6,3
22,215,7.0,False,10.0,9,9,3
21,211,9.0,True,5.0,6,6,3
51,575,8.0,True,5.0,5,5,3
55,634,14.0,False,15.0,14,14,3
38,436,13.0,False,5.0,6,6,3


n°979

In [30]:
%%capture 
casesIncludedSaTScan(979,True)

In [31]:
ind2sequenced(optionsClustLausanne, 979, casesClustLausanne)

New cases to sequence: 3.0


In [33]:
#Dont know for this cluster (494?)
optionsClust1007[optionsClust1007.id.isin([211,258,318,376,436,494])]

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
43,494,14.0,False,5.0,6,6,3
21,211,9.0,True,5.0,6,6,3
38,436,13.0,False,5.0,6,6,3
29,318,11.0,False,5.0,6,6,3
33,376,12.0,False,5.0,6,6,3
26,258,10.0,True,5.0,6,6,3


In [44]:
ind2sequenced(optionsClust1007, 494, casesClust1007)

New cases to sequence: 3.0


In [35]:
optionsClust1007[optionsClust1007.id.isin([71,127,215,260,326,372,438,499,564,634])]

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
44,499,12.0,False,14.0,13,13,3
22,215,7.0,False,10.0,9,9,3
55,634,14.0,False,15.0,14,14,3
13,127,5.0,True,8.0,7,7,3
27,260,8.0,True,12.0,11,11,3
30,326,9.0,False,12.0,11,11,3
32,372,10.0,False,14.0,13,13,3
39,438,11.0,False,14.0,13,13,3
50,564,13.0,False,8.0,7,7,2
6,71,3.0,False,3.0,3,3,2


n°127 if we want a significant cluster or n°634 if we want non significant cluster

In [43]:
ind2sequenced(optionsClust1007, 634, casesClust1007)

New cases to sequence: 11.0


In [50]:
optionsClustLausanne[optionsClustLausanne.id.isin([79,113,160,206,248,300,354,416,463,519,585])]

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
175,416,11.0,True,19.0,16,16,0
155,354,10.0,True,19.0,16,16,0
133,300,9.0,True,18.0,15,15,0
216,519,13.0,True,22.0,18,18,0
195,463,12.0,True,21.0,18,18,0
55,113,3.0,False,4.0,3,3,0
37,79,2.0,False,4.0,3,3,0
113,248,8.0,True,16.0,13,13,0
95,206,7.0,True,13.0,10,10,0
75,160,6.0,True,12.0,9,9,0


In [51]:
%%capture 
casesIncludedSaTScan(585,True)

In [52]:
ind2sequenced(optionsClustLausanne, 585, casesClustLausanne)

New cases to sequence: 22.0


In [54]:
optionsClustLausanne[optionsClustLausanne.id.isin([361,419,465,521,583,660,735])]

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
176,419,9.0,True,17.0,17,16,3
218,521,11.0,True,20.0,20,19,3
296,735,14.0,True,21.0,21,20,3
268,660,13.0,True,21.0,21,20,3
196,465,10.0,True,19.0,19,18,3
242,583,12.0,True,21.0,21,20,3
156,361,8.0,True,13.0,10,10,2


In [55]:
%%capture 
casesIncludedSaTScan(583,True)

In [56]:
ind2sequenced(optionsClustLausanne, 583, casesClustLausanne)

New cases to sequence: 18.0


In [57]:
optionsClustLausanne[optionsClustLausanne.id.isin([1647,1648,1650,1653,1655,1657,1659,1661,1663,1666,1669,1672,1676])]

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
598,1647,2.0,False,3.0,3,3,0
611,1676,14.0,False,3.0,3,3,0
609,1672,13.0,False,3.0,3,3,0
608,1669,12.0,False,3.0,3,3,0
607,1666,11.0,False,3.0,3,3,0
606,1663,10.0,False,3.0,3,3,0
605,1661,9.0,False,3.0,3,3,0
604,1659,8.0,False,3.0,3,3,0
603,1657,7.0,False,3.0,3,3,0
602,1655,6.0,False,3.0,3,3,0


In [59]:
ind2sequenced(optionsClustLausanne, 1676, casesClustLausanne)

New cases to sequence: 3.0


In [None]:
optionsClustLausanne[optionsClustLausanne.id.isin([305,365,420,527,589,654,729,791,860,925,1000,1061])]

In [62]:
%%capture 
casesIncludedSaTScan(791,True)

In [63]:
ind2sequenced(optionsClustLausanne, 791, casesClustLausanne)

New cases to sequence: 19.0


## Pully

In [8]:
optionsClustPully, casesClustPully= clustersLocation('Pully', sequenced,loc_type='mun')

Number of potential clusters we can select: 119
Number of sequenced genomes in this area: 18.0


In [9]:
optionsClustPully.sort_values('id',ascending=False).head(20)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
118,1680,14.0,True,6.0,6,6,0
117,1678,13.0,True,6.0,6,6,0
116,1674,12.0,True,5.0,5,5,0
115,1670,11.0,True,5.0,5,5,0
114,1667,10.0,True,5.0,5,5,0
113,1664,9.0,True,4.0,4,4,0
112,1662,8.0,True,4.0,4,4,0
111,1660,14.0,True,5.0,5,5,0
110,1658,13.0,True,4.0,4,4,0
109,1656,12.0,True,4.0,4,4,0


In [10]:
%%capture 
casesIncludedSaTScan(1680,True)

In [11]:
ind2sequenced(optionsClustPully, 1680, casesClustPully)

New cases to sequence: 6.0


In [12]:
optionsClustPully.head(25)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
0,13,4.0,True,8.0,7,7,6
22,308,9.0,True,15.0,13,13,5
62,855,14.0,True,21.0,20,20,5
56,790,13.0,True,20.0,19,19,5
51,723,12.0,True,20.0,19,19,5
44,594,14.0,True,20.0,18,18,5
40,541,13.0,False,17.0,15,15,5
36,479,12.0,True,17.0,15,15,5
10,161,10.0,True,10.0,9,9,5
11,164,6.0,True,10.0,9,9,5


In [13]:
%%capture 
casesIncludedSaTScan(594,True)

In [14]:
ind2sequenced(optionsClustPully, 594, casesClustPully)

New cases to sequence: 15.0


## Paudex

In [16]:
optionsClustPaudex, casesClustPaudex= clustersLocation('Paudex', sequenced,loc_type='mun')

Number of potential clusters we can select: 30
Number of sequenced genomes in this area: 6.0


In [18]:
optionsClustPaudex.head(10)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
10,723,12.0,True,20.0,19,19,5
12,855,14.0,True,21.0,20,20,5
11,790,13.0,True,20.0,19,19,5
3,379,6.0,False,11.0,10,10,4
6,502,8.0,False,12.0,15,15,4
9,648,11.0,True,22.0,22,22,4
2,320,5.0,False,8.0,6,6,3
4,444,7.0,False,10.0,7,7,3
1,282,4.0,False,7.0,4,4,3
7,516,7.0,True,15.0,13,13,3


In [19]:
%%capture 
casesIncludedSaTScan(855,True)

In [20]:
ind2sequenced(optionsClustPaudex, 855, casesClustPaudex)

New cases to sequence: 16.0


## Blonay (first cluster)

In [21]:
optionsClustBlonay, casesClustBlonay= clustersLocation('Blonay', sequenced,loc_type='mun')

Number of potential clusters we can select: 2
Number of sequenced genomes in this area: 0.0


In [22]:
optionsClustBlonay

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
0,0,2.0,True,4.0,4,4,0
1,158,14.0,True,4.0,4,4,0


We didn't keep this cluster (id:0) because three tests are for the same person..

## Yverdon-les-Bains

In [8]:
optionsClustYverdon, casesClustYverdon= clustersLocation('Yverdon-les-Bains', sequenced,loc_type='mun')

Number of potential clusters we can select: 40
Number of sequenced genomes in this area: 3.0


In [19]:
optionsClustYverdon.sort_values('observed',ascending=False).head(15)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
15,815,13.0,False,17.0,15,14,0
12,755,12.0,False,16.0,14,13,0
8,675,11.0,False,16.0,14,13,0
6,619,10.0,False,14.0,13,12,0
34,1161,11.0,False,14.0,11,11,0
31,1108,10.0,False,14.0,11,11,0
36,1207,12.0,False,14.0,11,11,0
39,1275,14.0,False,13.0,9,9,0
28,1068,9.0,False,13.0,10,10,0
38,1244,13.0,False,13.0,9,9,0


In [32]:
%%capture 
casesIncludedSaTScan(668,True)

In [33]:
ind2sequenced(optionsClustYverdon, 668, casesClustYverdon)

New cases to sequence: 4.0


In [21]:
ind2sequenced(optionsClustYverdon, 1275, casesClustYverdon)

New cases to sequence: 9.0


## Cossonay

In [34]:
optionsClustCossonay, casesClustCossonay= clustersLocation('Cossonay', sequenced,loc_type='mun')

Number of potential clusters we can select: 18
Number of sequenced genomes in this area: 4.0


In [35]:
optionsClustCossonay.head(10)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
13,926,14.0,True,19.0,16,15,3
4,355,5.0,True,14.0,13,12,3
5,417,6.0,True,14.0,13,12,3
6,472,7.0,True,14.0,13,12,3
7,524,8.0,True,15.0,13,12,3
10,730,11.0,True,18.0,15,14,3
12,861,13.0,True,19.0,16,15,3
9,658,10.0,True,9.0,3,3,2
2,281,3.0,False,4.0,2,2,2
8,584,9.0,True,9.0,3,3,2


In [37]:
optionsClustCossonay[optionsClustCossonay.id.isin([658,584,798,1016,1073])]

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
9,658,10.0,True,9.0,3,3,2
8,584,9.0,True,9.0,3,3,2
11,798,12.0,True,9.0,3,3,2
15,1073,14.0,False,7.0,2,2,1
14,1016,13.0,False,7.0,2,2,1


In [38]:
%%capture 
casesIncludedSaTScan(472,True)

In [39]:
ind2sequenced(optionsClustCossonay, 472, casesClustCossonay)

New cases to sequence: 11.0


## Oron

In [40]:
optionsClustOron, casesClustOron= clustersLocation('Oron', sequenced,loc_type='mun')

Number of potential clusters we can select: 33
Number of sequenced genomes in this area: 3.0


In [42]:
optionsClustOron.head(10)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
11,813,12.0,False,11.0,11,8,3
12,884,13.0,False,11.0,11,8,3
13,934,14.0,False,12.0,12,9,3
7,542,8.0,False,9.0,9,6,3
8,593,9.0,True,11.0,11,8,3
9,669,10.0,False,11.0,11,8,3
10,740,11.0,False,11.0,11,8,3
15,1052,14.0,False,12.0,12,9,2
14,994,13.0,False,12.0,12,9,2
5,450,6.0,False,6.0,5,4,2


In [43]:
ind2sequenced(optionsClustOron, 934, casesClustOron)

New cases to sequence: 9.0


## Servion

In [45]:
optionsClustServion, casesClustServion= clustersLocation('Servion', sequenced,loc_type='mun')

Number of potential clusters we can select: 30
Number of sequenced genomes in this area: 4.0


In [47]:
optionsClustServion.head(15)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
3,123,7.0,True,8.0,6,5,4
4,170,8.0,True,11.0,8,7,4
11,533,13.0,False,6.0,6,5,3
2,98,6.0,True,7.0,5,4,3
5,223,7.0,False,6.0,6,5,3
6,265,8.0,False,7.0,7,6,3
7,328,9.0,False,7.0,7,6,3
8,360,10.0,True,6.0,6,5,3
9,425,11.0,True,6.0,6,5,3
10,478,12.0,True,6.0,6,5,3


In [48]:
%%capture 
casesIncludedSaTScan(360,True)

In [49]:
ind2sequenced(optionsClustServion, 360, casesClustServion)

New cases to sequence: 3.0


## Vallorbe

In [22]:
optionsClustVallorbe, casesClustVallorbe= clustersLocation('Vallorbe', sequenced,loc_type='mun')

Number of potential clusters we can select: 26
Number of sequenced genomes in this area: 2.0


In [23]:
optionsClustVallorbe.sort_values('duration',ascending=False).head(10)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
19,576,14.0,True,25.0,23,23,0
24,964,14.0,False,12.0,12,12,0
23,870,14.0,False,19.0,19,19,0
22,792,14.0,True,22.0,20,20,0
18,517,13.0,True,22.0,20,20,0
21,724,13.0,True,22.0,20,20,0
20,650,12.0,True,22.0,20,20,0
16,466,12.0,True,20.0,18,18,0
14,413,11.0,True,20.0,18,18,0
17,481,11.0,False,14.0,10,9,2


In [24]:
%%capture 
casesIncludedSaTScan(792,True)

In [25]:
ind2sequenced(optionsClustVallorbe, 792, casesClustVallorbe)

New cases to sequence: 22.0


## Montanaire

In [34]:
optionsClustMontanaire, casesClustMontanaire= clustersLocation('Montanaire', sequenced,loc_type='mun')

Number of potential clusters we can select: 33
Number of sequenced genomes in this area: 3.0


In [37]:
optionsClustMontanaire.head(20)

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
20,1038,14.0,True,24.0,22,22,3
13,850,11.0,True,23.0,21,21,3
2,418,4.0,True,13.0,11,11,3
5,577,7.0,True,18.0,16,16,3
17,982,13.0,True,24.0,22,22,3
7,646,8.0,True,20.0,18,18,3
9,718,9.0,True,21.0,19,19,3
15,916,12.0,True,24.0,22,22,3
11,784,10.0,True,23.0,21,21,3
1,350,2.0,False,5.0,5,5,1


In [38]:
%%capture 
casesIncludedSaTScan(1131,True)

In [39]:
ind2sequenced(optionsClustMontanaire, 1131, casesClustMontanaire)

New cases to sequence: 13.0


## Bussigny

In [40]:
optionsClustBussigny, casesClustBussigny= clustersLocation('Bussigny', sequenced,loc_type='mun')

Number of potential clusters we can select: 16
Number of sequenced genomes in this area: 0.0


In [41]:
optionsClustBussigny

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
0,186,3.0,False,5.0,4,4,0
1,241,2.0,False,5.0,3,3,0
2,349,4.0,False,7.0,4,4,0
3,406,5.0,False,8.0,5,5,0
4,415,6.0,True,15.0,13,13,0
5,470,7.0,True,15.0,13,13,0
6,529,8.0,True,15.0,12,11,0
7,578,9.0,True,19.0,17,17,0
8,649,10.0,True,20.0,18,18,0
9,699,9.0,False,4.0,2,2,0


In [42]:
%%capture 
casesIncludedSaTScan(649,True)

In [43]:
ind2sequenced(optionsClustBussigny, 649, casesClustBussigny)

New cases to sequence: 20.0


## Genolier

In [44]:
optionsClustGenolier, casesClustGenolier= clustersLocation('Genolier', sequenced,loc_type='mun')

Number of potential clusters we can select: 10
Number of sequenced genomes in this area: 3.0


In [45]:
optionsClustGenolier

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
0,9,2.0,True,3.0,3,3,3
1,16,2.0,True,3.0,3,3,3
2,28,3.0,False,3.0,3,3,3
3,44,4.0,True,4.0,4,4,3
4,70,5.0,False,4.0,4,4,3
5,104,6.0,False,4.0,4,4,3
6,137,7.0,False,4.0,4,4,3
7,193,8.0,False,4.0,4,4,3
8,230,9.0,False,5.0,5,5,3
9,294,10.0,False,5.0,5,5,3


In [47]:
ind2sequenced(optionsClustGenolier, 230, casesClustGenolier)

New cases to sequence: 2.0


## Prilly

In [54]:
optionsClustPrilly, casesClustPrilly= clustersLocation('Prilly', sequenced,loc_type='mun')

Number of potential clusters we can select: 113
Number of sequenced genomes in this area: 4.0


In [55]:
optionsClustPrilly[optionsClustPrilly.id.isin([1582,1588,1592])]

Unnamed: 0,id,duration,significant,observed,nb_cases,nb_geoBuilding,nb_sequenced
107,1592,14.0,True,6.0,6,6,0
106,1588,13.0,True,6.0,6,6,0
105,1582,12.0,True,6.0,6,6,0


In [57]:
%%capture 
casesIncludedSaTScan(1592,True)

In [58]:
ind2sequenced(optionsClustPrilly, 1592, casesClustPrilly)

New cases to sequence: 6.0
