In [40]:
import requests
import pandas as pd
import numpy as np
import json
import geopandas as gpd
import os
import folium
from dotenv import load_dotenv
load_dotenv()
from bokeh.palettes import brewer

In [41]:
import folium as fl
from sklearn import preprocessing

In [42]:
# load the token
token = os.getenv('token')
url = os.getenv('url')

In [43]:
# create function dataframe from api dataset
def get_data_frame_from_api(token,url):
    # build the header api
    headers = {'Authorization': token}
    # get request from the kobotoolbox api
    response  = requests.get(url,headers=headers)
    # convert binary content to string
    content =response.content.decode()
    # convert string to json objects
    content = json.loads(str(content))
    # create the FOKAL dataframe
    df =pd.DataFrame(content[1:])
    # rebuild the column names
    columns = [col.split("/")[-1] for col in df.columns]
    # renames the column names
    df.columns = columns
    # save the dataset to csv file
    #df.to_csv('datasets/fokal_dataset1.csv')
    return df

In [44]:
df =get_data_frame_from_api(token,url)

In [45]:
df.shape

(436, 82)

In [46]:
cols = sorted(df.columns)
for col in cols:
    print(col)

__version__
_attachments
_bamboo_dataset_id
_geolocation
_id
_notes
_status
_submission_time
_submitted_by
_tags
_uuid
_validation_status
_version_
_xform_id_string
acc_int
adresse_site_internet
age
annee_debut_pratique
artis_prin_revenu
artist_gps
aut_act_artis
autre_cause_defendue
autre_obstacle
autres_sources_finan
cause_defendue
commune
defense_cause
departement
deviceid
dis_artis_princ
droits_auteur_sur_vente
email
end
enre_societe_droit_auteur
financement_Etat
financement_priv_etran
financement_priv_ha
financement_pub_etran
financement_source
groupe_legal
id_autre
id_facebook
id_instagram
id_twitter
id_whatsapp
instanceID
lieu_exposition
manque_a_gagner
nationalite
nbre_membres
nbre_residence_creation
nom
nom_artiste
nom_groupe_partie
nom_resp
numero_tel
obstacle
participation_festival
partie_groupe
perte_cause_corona
prenom
prenom_resp
quartier
reseau_social
residence_creation
revenu_2018_2019
revenu_mars_2020
revenu_temps_normal
sexe
site_internet
societe_droit_auteur
source_fi

In [47]:
df['nom_artiste']

0                NaN
1                NaN
2                NaN
3                NaN
4                NaN
           ...      
431              NaN
432              NaN
433     Gine Cajuste
434             Kika
435    Arquebus Only
Name: nom_artiste, Length: 436, dtype: object

In [35]:
#df = get_data_frame_from_api(token,url)
df['nom']=df['nom'].astype('str')
df['prenom']=df['prenom'].astype('str')
df['sexe']=df['sexe'].astype('str')

In [36]:
df['long'] =df.loc[50,'_geolocation'][0]
df['lat'] =df.loc[50,'_geolocation'][1]

In [37]:
#Roosevelt Saillant 
df.loc[(df.nom.str.contains('Morse')) | ((df.nom.str.contains('Bien-Aimé')) & (df.prenom.str.contains('Ga')) | (df.prenom.str.contains('Roosevelt')) ) | (df.prenom.str.contains('Emeline')) | ((df.prenom.str.contains('T')))  ,['nom','prenom','artist_gps','sexe',"commune"]]

Unnamed: 0,nom,prenom,artist_gps,sexe,commune
20,Bien-Aimé,Gaëlle,18.530143 -72.323536 119.0866928100586 10,,
50,Hector,Ted,18.538639 -72.407136 0 1700,homme,ht0113
239,Theodore,Terry Lund,18.53957 -72.296368 146.20001220703125 24.8999...,homme,ht0111
316,Lohier,Tamara ingrid,,femme,ht0211
331,Tranquille,Taina,,femme,ht0114
355,Théodore,Tessia,,femme,ht0118
426,Morse,Richard,,homme,ht0111
427,Michel,Emeline,,femme,ht0111
428,Saillant,Roosevelt,18.531868 -72.268733 195.70001220703125 29.236...,homme,ht0114
429,Saint Louis,Evenie Rose Thafaina,18.570629 -72.290272 0 1896,femme,ht0112


In [38]:
df['long']

0      18.538639
1      18.538639
2      18.538639
3      18.538639
4      18.538639
         ...    
429    18.538639
430    18.538639
431    18.538639
432    18.538639
433    18.538639
Name: long, Length: 434, dtype: float64

In [39]:
df.to_excel('datasets/fokal_dataset1.xlsx')

In [7]:
def replace_all_values(df):
    df_choices = pd.read_excel('datasets/data_dict.xlsx',sheet_name='choices')
    df_survey = pd.read_excel('datasets/data_dict.xlsx',sheet_name='survey')
    map_dict = dict()
    for index,value in zip(df_choices['name'],df_choices['label::French (fr)']):
        if  'ht' not in index:
            map_dict[index] = value
    df.replace(map_dict,inplace=True)
    return df,map_dict


In [8]:
def filter_and_build_dataset(division="departement",revenu=['tranche1','tranche2','tranche3','tranche4'],site_internet="oui",subvension="oui",discipline="arts",log_transform=True,normalize=False):
    selected_feature = [division,'sexe','prenom','nom','age','email','site_internet','dis_artis_princ','whatsapp','id_whatsapp','adresse_site_internet','subventions','nationalite','email','id_facebook','id_instagram','cause_defendue','aut_act_artis','artist_gps','revenu_temps_normal']
    boundary_id = dict(departement="ADM1_PCODE",commune = "ADM2_PCODE")
    boundary_name =dict(departement="ADM1_FR",commune = "ADM2_FR")
    boundary_url = dict(departement='datasets/boundaries/hti_admbnda_adm1_cnigs_20181129.shp',commune="datasets/boundaries/hti_admbnda_adm2_cnigs_20181129.shp")
    dataset = pd.read_csv('datasets/fokal_dataset1.csv')
    col =[boundary_name[admin_level],boundary_id[admin_level],'geometry']
    boundary_data = gpd.read_file(boundary_url[admin_level])
    

In [9]:
def create_choropleth_map(dataset, boundary_data,agg_column='departement',legend="Nombre de site",log_transform=True,normalize=True,boundary_id='ADM1_PCODE',color='RdYlBu'):
    # df =  dataset.groupby(by=agg_column).size()
    # df=df.to_frame().reset_index()
    
    # if log transform is allowed
    if log_transform:
        dataset['count']= np.log(dataset['count'])
    # if normalization is allowed
    if normalize:
        values =preprocessing.normalize(dataset[['count']])
        dataset['count'] = values
    
    # convert boundary_id to upper case() for merging
   
    dataset[boundary_id] =dataset[boundary_id].str.upper()
    # display dataset
    # display(df.head())
    # merge the dataset to boundaries
    
    data_map =pd.merge(boundary_data,dataset,how='left')
     # fill na values to 0
    data_map.fillna(0,inplace=True)
    _,data_map =df_map_color(data_map,'count')
   
    # display the final dataset
    # display(data_map.head())
    # convert data to geojson for the choropleth map
    geo_data = data_map.to_json()
    # create the map
    m = folium.Map(location=[18.99997, -72.995215], zoom_start=8, control_scale=True,tiles=None)
    
    # add dataset for displaying the choropleth map
    m.choropleth(
    geo_data=geo_data,
    data=data_map,
    columns=[boundary_id, 'count'],
    key_on= f'feature.properties.{boundary_id}',
    fill_color=color, 
    fill_opacity=0.8, 
    line_opacity=1,
    legend_name=legend+ f'par {agg_column}',
    )
    return data_map,m

# display map


In [10]:
def select_choropleth_map(admin_level='departement', normalize=False, log_transform =False,boundary_id="ADM1_PCODE" ):
    boundary_data = None
    if admin_level=='departement':
        col =['ADM1_FR',boundary_id,'geometry']
        boundary_data = gpd.read_file('datasets/boundaries/hti_admbnda_adm1_cnigs_20181129.shp')
    elif admin_level=='commune':
        col =['ADM2_FR',boundary_id,'geometry']
        boundary_data = gpd.read_file('datasets/boundaries/hti_admbnda_adm2_cnigs_20181129.shp')
    # dataset = get_data_frame_from_api(token,url)
    boundary_data =boundary_data[col]
    boundary_data.set_geometry('geometry')
    dataset = pd.read_csv('datasets/fokal_dataset1.csv')
    df_build= build_final_dataset(df=dataset,admin_level=admin_level)
    return create_choropleth_map(dataset=df_build,boundary_data=boundary_data,agg_column=admin_level,boundary_id=boundary_id,log_transform=log_transform,normalize=normalize)


 <li class="nav-item active">
          <a class="nav-link" href="#">Home <span class="sr-only">(current)</span></a>
        </li>

In [11]:
def df_map_color(data,column,palette = 'RdYlBu',range = 10):
    # creates a color palette
    palette = brewer['RdYlBu'][range]
    # reverses the color palette
    palette =palette[::-1]
    # add the color column to dataframe
    data['color'] = pd.cut(data[column], bins=range,labels=list(palette))
    # computes the palette legend
    color_map = data[data.color.notna()].groupby(['color'])[column].min().to_frame().reset_index() 
    # removes non display colunms
    color_map = color_map[color_map[column].notna()]
    display(data)
    return color_map,data

In [12]:
def  get_longitude(x):
    long_ = None
    x =str(x)
    if len(x.split(' ')) >2:
        long_ =x.split(' ')[0]
        
    return long_


In [13]:
def  get_latitude(x):
    lat_ = None
    x =str(x)
    if len(x.split(' ')) >2:
        lat_ =x.split(' ')[1]
        
    return lat_


In [14]:
def age_group(x):
    x = int(x)
    #  18-24 ans			25-35 ans		36-50 ans		50-70 ans		 + de 70 ans
    group = None
    if x>=18 and x<26:
        group="18-24 ans"
    elif x>=26 and x<36:
        group="26-35 ans"
    elif x>=36 and x<50:
        group="36-50 ans"
    elif x>=50 and x<70:
        group="50-70 ans"
    elif x>=70:
        group="+ de 70 ans"
    
    return group

In [31]:
def filter_and_build_dataset(division="departement",revenu=['revenu_tranche1','revenu_tranche2','revenu_tranche4','revenu_tranche5','revenu_tranche6','nan'],site_internet="",subvension="",discipline=['NA'],log_transform=False,normalize=False):
    boundary_id = dict(departement="ADM1_PCODE",commune = "ADM2_PCODE")
    boundary_name =dict(departement="ADM1_FR",commune = "ADM2_FR")
    boundary_url = dict(departement='datasets/boundaries/hti_admbnda_adm1_cnigs_20181129.shp',commune="datasets/boundaries/hti_admbnda_adm2_cnigs_20181129.shp")
   
    boundary_data = None
    boundary_data = gpd.read_file(boundary_url[division])
    boundary_data['long'] = boundary_data.geometry.centroid.x
    boundary_data['lat'] = boundary_data.geometry.centroid.y
    col =[boundary_name[division],boundary_id[division],'geometry','long','lat']
    boundary_data.set_geometry('geometry')
    boundary_data = boundary_data[col]
     
    dataset = pd.read_csv('datasets/fokal_dataset1.csv')
    #dataset = df_master
    dataset = dataset.drop_duplicates(subset=['nom','prenom','age','sexe',''])
    selected_features = [division,'sexe','prenom','nom','age','email','site_internet','dis_artis_princ','whatsapp','id_whatsapp','adresse_site_internet','subventions','nationalite','email','id_facebook','id_instagram','cause_defendue','aut_act_artis','artist_gps','revenu_temps_normal']
    dataset = dataset[selected_features]
    dataset=dataset[~(dataset.sexe.isna() | dataset.nom.isna() | dataset.prenom.isna())]
   
    if revenu[-1] != 'NA':
        dataset = dataset[dataset.revenu_temps_normal.isin(revenu)]
       
    if site_internet =='oui':
        dataset = dataset[dataset.site_internet ==site_internet ]
       
    if subvension  =='oui':
        dataset = dataset[dataset.subventions ==subvension ]
      
    if discipline[-1] != 'NA':
        dataset = dataset[dataset.dis_artis_princ.isin(discipline) ]
       

    dataset.rename(columns=boundary_id,inplace=True)
    dataset[boundary_id[division]]= dataset[boundary_id[division]].str.upper()
    
    dataset=dataset.groupby(by=boundary_id[division]).size().reset_index().rename(columns={0:'count'})
    dataset['value'] =  dataset['count']
    total =  dataset['count'].sum()
    if log_transform:
        dataset['count']= np.round(np.log(dataset['count']),2)
    # if normalization is allowed
    if normalize:
        values =np.round(preprocessing.normalize(dataset[['count']])*100,0)
        dataset['count'] = values
        
    
    
    dataset =pd.merge(boundary_data,dataset,how='left')
    dataset.fillna(0,inplace=True)
    

        
    map_color,dataset =df_map_color(dataset,'count')    
    dataset = dataset.to_json()
    #map_color = map_color.to_json()
    map_color = dict(color=list(map_color['color']),values=list(map_color['count']))
   
    return dataset,map_color,total

In [32]:
def filter_data_point(revenu=['NA'],site_internet='',subvension='',discipline=['NA'],name=""):
    boundary_id = dict(departement="ADM1_PCODE",commune = "ADM2_PCODE")
    boundary_name =dict(departement="ADM1_FR",commune = "ADM2_FR")
    boundary_url = dict(departement='datasets/boundaries/hti_admbnda_adm1_cnigs_20181129.shp',commune="datasets/boundaries/hti_admbnda_adm2_cnigs_20181129.shp")
    division = 'commune'
    boundary_data = None
    boundary_data = gpd.read_file(boundary_url[division])
    boundary_data.set_geometry('geometry')
    boundary_data['x'] = boundary_data.geometry.centroid.x
    boundary_data['y'] = boundary_data.geometry.centroid.y
    col =[boundary_name[division],boundary_id[division],'x','y']
    boundary_data = boundary_data[col]
     
    dataset = pd.read_csv('datasets/fokal_dataset1.csv')
    #dataset = df_master
    dataset = dataset.drop_duplicates(subset=['nom','prenom','age','sexe'])
    selected_features = [division,'sexe','prenom','nom','age','email','site_internet','dis_artis_princ','whatsapp','id_whatsapp','adresse_site_internet','subventions','nationalite','id_facebook','id_instagram','cause_defendue','aut_act_artis','artist_gps','revenu_temps_normal']
    dataset = dataset[selected_features]
    dataset=dataset[~(dataset.sexe.isna() | dataset.nom.isna() | dataset.prenom.isna())]
   
    if revenu[-1] != 'NA':
        dataset = dataset[dataset.revenu_temps_normal.isin(revenu)]
      
    if site_internet =='oui':
        dataset = dataset[dataset.site_internet ==site_internet ]
        
    if subvension  =='oui':
        dataset = dataset[dataset.subventions ==subvension ]
        
    if discipline[-1] != 'NA':
        dataset = dataset[dataset.dis_artis_princ.isin(discipline) ]
    if name !='NA':
         print(name)
         names = name.split(",")
         if len(names) == 2:
            print(names[1])
            dataset = dataset[(dataset.nom.str.contains(names[0])) & (dataset.prenom.str.contains(names[1].strip())) ]
         elif  len(names)== 1:
            dataset = dataset[dataset.nom.str.contains(names[0]) ]

       

    dataset.rename(columns=boundary_id,inplace=True)
    dataset[boundary_id[division]]= dataset[boundary_id[division]].str.upper()
              
    
    
    dataset =pd.merge(boundary_data,dataset,how='inner')
    dataset.fillna(0,inplace=True)
    dataset['long'] =  dataset.artist_gps.apply(get_longitude)
    dataset['lat'] = dataset.artist_gps.apply(get_latitude)
    dataset['lat'] =  dataset['lat'].astype('float')
    dataset['long'] =  dataset['long'].astype('float')
    dataset['age_group'] = dataset.age.apply(age_group)

    #dataset.loc[(dataset['long'] == 0) | (dataset['long'] == 1), 'long'] =  dataset.loc[(dataset['long'] == 0) | (dataset['long'] == 1), 'x']
    #dataset.loc[(dataset['lat'] == 0) | (dataset['lat'] == 1), 'lat'] =  dataset.loc[(dataset['lat'] == 0) | (dataset['lat'] == 1), 'y']
    # dataset = dataset.to_dict(orient='records')
    # dataset = json.dumps(dataset)  
    
    return dataset.to_json(orient='records'),dataset

In [33]:
df_master= get_data_frame_from_api(token,url)

In [34]:
df_master.head()

Unnamed: 0,_notes,lieu_exposition,partie_groupe,nom,participation_festival,_xform_id_string,_bamboo_dataset_id,_tags,obstacle,sexe,...,financement_pub_etran,adresse_site_internet,id_twitter,nbre_residence_creation,manque_a_gagner,id_autre,droits_auteur_sur_vente,societe_droit_auteur,financement_priv_etran,financement_Etat
0,[],expo_haiti,non,Doré,oui,abiGcTbhyWEBA629o6dM9D,,[],acces_subventions manque_pers_intl,homme,...,,,,,,,,,,
1,[],,non,Maitre,non,abiGcTbhyWEBA629o6dM9D,,[],absence_Haïti,homme,...,,,,,,,,,,
2,[],,,,,abiGcTbhyWEBA629o6dM9D,,[],,,...,,,,,,,,,,
3,[],expo_haiti,oui,SAINTIL,oui,abiGcTbhyWEBA629o6dM9D,,[],manque_formation acces_subventions absence_Haïti,femme,...,,,,,,,,,,
4,[],expo_haiti,oui,Petit Homme,oui,abiGcTbhyWEBA629o6dM9D,,[],acces_subventions absence_Haïti,femme,...,pallier5,www.collectifdesjeunesdegrosbalancepourunautre...,,,,,,,,


In [35]:
_,data =filter_data_point()





  boundary_data['x'] = boundary_data.geometry.centroid.x

  boundary_data['y'] = boundary_data.geometry.centroid.y


In [36]:
data.head()

Unnamed: 0,ADM2_FR,ADM2_PCODE,x,y,sexe,prenom,nom,age,email,site_internet,...,nationalite,id_facebook,id_instagram,cause_defendue,aut_act_artis,artist_gps,revenu_temps_normal,long,lat,age_group
0,Abricots,HT0812,-74.315712,18.61511,homme,Joubert Daphné,Marc,35.0,phaorg@yahoo.fr,non,...,Haïtienne,0,0,environnement droits_humains droit_femme,0,18.649294 -74.307191 -36.22349482295821 64,0,18.649294,-74.307191,26-35 ans
1,Abricots,HT0812,-74.315712,18.61511,homme,Joseph Ginel,Louis,45.0,gjlouis2212@gmail.com,non,...,Haïtienne,0,0,environnement droit_handicapes autre_preciser,arts_plastiques,18.64925 -74.307238 -13.779501525879258 80,revenu_tranche1,18.64925,-74.307238,36-50 ans
2,Abricots,HT0812,-74.315712,18.61511,homme,Instz,Brunache,33.0,benbrunache@yahoo.fr,non,...,Haïtienne,0,0,environnement droit_enfants,arts_plastiques,18.64935 -74.307157 -9.001864080501367 22,revenu_tranche1,18.64935,-74.307157,26-35 ans
3,Abricots,HT0812,-74.315712,18.61511,femme,Calypso,Mosena,25.0,0,non,...,Haïtienne,Calypso Mosena,0,environnement droit_femme droit_handicapes dro...,theatre arts_plastiques,18.642691 -74.306871 0 2200,revenu_tranche1,18.642691,-74.306871,18-24 ans
4,Aquin,HT0731,-73.217885,18.281274,homme,Biltonn,Bossé,32.0,bbiltonn@yahoo.fr,non,...,Haïtienne,Biltonn Bosse,0,environnement droits_humains droit_femme droit...,litterature,0,0,,,26-35 ans


In [21]:
!pip install openpyxl
data[~data.long.isna()].to_excel('bad_gps2.xlsx')



Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
You should consider upgrading via the 'c:\users\user\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.


In [22]:
df2 =data[~data.long.isna()]

In [23]:
df2.head()

Unnamed: 0,ADM2_FR,ADM2_PCODE,x,y,sexe,prenom,nom,age,email,site_internet,...,nationalite,id_facebook,id_instagram,cause_defendue,aut_act_artis,artist_gps,revenu_temps_normal,long,lat,age_group
0,Abricots,HT0812,-74.315712,18.61511,homme,Joubert Daphné,Marc,35.0,phaorg@yahoo.fr,non,...,Haïtienne,0,0,environnement droits_humains droit_femme,0,18.649294 -74.307191 -36.22349482295821 64,0,18.649294,-74.307191,26-35 ans
1,Abricots,HT0812,-74.315712,18.61511,homme,Joseph Ginel,Louis,45.0,gjlouis2212@gmail.com,non,...,Haïtienne,0,0,environnement droit_handicapes autre_preciser,arts_plastiques,18.64925 -74.307238 -13.779501525879258 80,revenu_tranche1,18.64925,-74.307238,36-50 ans
2,Abricots,HT0812,-74.315712,18.61511,homme,Instz,Brunache,33.0,benbrunache@yahoo.fr,non,...,Haïtienne,0,0,environnement droit_enfants,arts_plastiques,18.64935 -74.307157 -9.001864080501367 22,revenu_tranche1,18.64935,-74.307157,26-35 ans
3,Abricots,HT0812,-74.315712,18.61511,femme,Calypso,Mosena,25.0,0,non,...,Haïtienne,Calypso Mosena,0,environnement droit_femme droit_handicapes dro...,theatre arts_plastiques,18.642691 -74.306871 0 2200,revenu_tranche1,18.642691,-74.306871,18-24 ans
5,Beaumont,HT0833,-73.981935,18.44588,homme,MERES,WECHE,77.0,wechemeres@gmail.com,oui,...,HAITIEN NATURALISÉ CANADIEN,MERES WECHE,0,environnement droit_femme autre_preciser,litterature,45.753994 3.086495 0.3 0.2,revenu_tranche2,45.753994,3.086495,+ de 70 ans


In [24]:
d1 = df2[['x','y']].astype('float')
d2 = df2[['lat','long']].astype('float')
dist =np.linalg.norm(d1 - d2) 


In [25]:
distance(df)

NameError: name 'distance' is not defined

In [None]:
df2['dist'] = dist

In [None]:
df2[['x','y','long','lat','dist']]

In [None]:
data['dist'] = np.nan
for index in data.index:
    d1= np.array((data.at[index,'lat'],data.at[index,'long']))
    d2 = np.array((data.at[index,'x'],data.at[index,'y']))
    data.at[index,'dist'] = np.linalg.norm(d1 - d2) 
    



In [None]:
data.sort_values(by='dist',ascending=0,inplace=True)

In [None]:
data.to_excel("data_gps.xlsx")

In [None]:
data[data.dist<= 2]

In [26]:
data.

Unnamed: 0,ADM2_FR,ADM2_PCODE,x,y,sexe,prenom,nom,age,email,site_internet,...,nationalite,id_facebook,id_instagram,cause_defendue,aut_act_artis,artist_gps,revenu_temps_normal,long,lat,age_group
0,Abricots,HT0812,-74.315712,18.61511,homme,Joubert Daphné,Marc,35.0,phaorg@yahoo.fr,non,...,Haïtienne,0,0,environnement droits_humains droit_femme,0,18.649294 -74.307191 -36.22349482295821 64,0,18.649294,-74.307191,26-35 ans
1,Abricots,HT0812,-74.315712,18.61511,homme,Joseph Ginel,Louis,45.0,gjlouis2212@gmail.com,non,...,Haïtienne,0,0,environnement droit_handicapes autre_preciser,arts_plastiques,18.64925 -74.307238 -13.779501525879258 80,revenu_tranche1,18.64925,-74.307238,36-50 ans
2,Abricots,HT0812,-74.315712,18.61511,homme,Instz,Brunache,33.0,benbrunache@yahoo.fr,non,...,Haïtienne,0,0,environnement droit_enfants,arts_plastiques,18.64935 -74.307157 -9.001864080501367 22,revenu_tranche1,18.64935,-74.307157,26-35 ans
3,Abricots,HT0812,-74.315712,18.61511,femme,Calypso,Mosena,25.0,0,non,...,Haïtienne,Calypso Mosena,0,environnement droit_femme droit_handicapes dro...,theatre arts_plastiques,18.642691 -74.306871 0 2200,revenu_tranche1,18.642691,-74.306871,18-24 ans
4,Aquin,HT0731,-73.217885,18.281274,homme,Biltonn,Bossé,32.0,bbiltonn@yahoo.fr,non,...,Haïtienne,Biltonn Bosse,0,environnement droits_humains droit_femme droit...,litterature,0,0,,,26-35 ans


In [37]:
data,_,__= filter_and_build_dataset()


  boundary_data['long'] = boundary_data.geometry.centroid.x

  boundary_data['lat'] = boundary_data.geometry.centroid.y


KeyError: Index([''], dtype='object')