In [267]:
# Add your imports here
import pandas as pd
import numpy as np
import scipy as sp
import findspark
import os
findspark.init(os.environ['SPARK_HOME'])
from pyspark.sql import *
from pyspark.sql.functions import *
import matplotlib.pyplot as plt

In [268]:
departements = (["{:0^3}".format(i) for i in range(1, 20)] +
               ["{:0^3}".format(i) for i in range(21, 96)] +
               # Corsica
               ["2A0", "2B0"] +
               # DOM-TOM
               ["{}".format(i) for i in range(971, 975)] + ["976"])

In [269]:
# Read all the departments-related sheets of the excel files
# The headers are split across rows 19 and 20.
ircom_communes = pd.read_excel("../data/raw/ircom_2017_revenus_2016.xlsx", header=[19, 20],
                               sheet_name=departements
                              )

In [270]:
def format_sheet(sheet, sheet_name):
    ircom_communes_processed = sheet
    ircom_communes_processed.columns = ['_'.join([column for column in col if not "Unnamed" in column])
                                            .strip().strip('_') 
                                        for col in ircom_communes_processed.columns.values
                                        ]
    return ircom_communes_processed


ircom_communes_aggregated = None
for sheet_name, sheet in ircom_communes.items():
    if ircom_communes_aggregated is not None:
        df = format_sheet(sheet, sheet_name)
        ircom_communes_aggregated = pd.concat([ircom_communes_aggregated, df])
    else:
        ircom_communes_aggregated = format_sheet(sheet, sheet_name)

ircom_communes_aggregated.head()

Unnamed: 0,Dép.,Commune,Libellé de la commune,Revenu fiscal de référence par tranche (en euros),Nombre de foyers fiscaux,Revenu fiscal de référence des foyers fiscaux,Impôt net (total)*,Nombre de foyers fiscaux imposés,Revenu fiscal de référence des foyers fiscaux imposés,Traitements et salaires_Nombre de foyers concernés,Traitements et salaires_Montant,Retraites et pensions_Nombre de foyers concernés,Retraites et pensions_Montant
,10,1,L'Abergement-Clémenciat,Total,413,12210.0,692.389,219,8917.02,280,8769.07,155,3802.81
,10,2,L'Abergement-de-Varey,Total,127,3866.62,196.849,67,2776.44,93,2825.99,42,1189.4
,10,4,Ambérieu-en-Bugey,0 à 10 000,1754,7411.82,-15.782,n.c.,n.c.,849,5426.35,467,3989.3
,10,4,Ambérieu-en-Bugey,10 001 à 12 000,497,5476.77,-9.969,n.c.,n.c.,318,4075.28,191,2516.25
,10,4,Ambérieu-en-Bugey,12 001 à 15 000,894,12154.1,-23.811,58,861.66,693,10835.4,227,3352.28


In [271]:
ircom_communes_aggregated = ircom_communes_aggregated[
    ircom_communes_aggregated['Revenu fiscal de référence par tranche (en euros)'] == 'Total'
]
ircom_communes_aggregated = ircom_communes_aggregated.drop(columns=['Impôt net (total)*', 'Traitements et salaires_Nombre de foyers concernés',
       'Traitements et salaires_Montant',
       'Retraites et pensions_Nombre de foyers concernés',
       'Revenu fiscal de référence des foyers fiscaux imposés',
       'Revenu fiscal de référence par tranche (en euros)',
       'Retraites et pensions_Montant'])
ircom_communes_aggregated.head()

Unnamed: 0,Dép.,Commune,Libellé de la commune,Nombre de foyers fiscaux,Revenu fiscal de référence des foyers fiscaux,Nombre de foyers fiscaux imposés
,10,1,L'Abergement-Clémenciat,413,12210.0,219
,10,2,L'Abergement-de-Varey,127,3866.62,67
,10,4,Ambérieu-en-Bugey,8186,194722.0,3505
,10,5,Ambérieux-en-Dombes,862,25561.9,442
,10,6,Ambléon,59,1755.41,32


In [218]:
ircom_communes_aggregated = ircom_communes_aggregated[ircom_communes_aggregated['Nombre de foyers fiscaux'] != "n.c."]

In [219]:
ircom_communes_aggregated = ircom_communes_aggregated[ircom_communes_aggregated['Revenu fiscal de référence des foyers fiscaux'] != "n.c."]
ircom_communes_aggregated = ircom_communes_aggregated[ircom_communes_aggregated['Nombre de foyers fiscaux imposés'] != "n.c."]

In [220]:
ircom_communes_aggregated['Nombre de foyers fiscaux'] = ircom_communes_aggregated['Nombre de foyers fiscaux'].astype('int')
ircom_communes_aggregated['Revenu fiscal de référence des foyers fiscaux'] = ircom_communes_aggregated['Revenu fiscal de référence des foyers fiscaux'].astype('float')
ircom_communes_aggregated['Nombre de foyers fiscaux imposés'] = ircom_communes_aggregated['Nombre de foyers fiscaux imposés'].astype('int')

In [221]:
ircom_communes_aggregated['Mean reference fiscal income (in k€)'] = ircom_communes_aggregated["Revenu fiscal de référence des foyers fiscaux"] / ircom_communes_aggregated['Nombre de foyers fiscaux']

In [276]:
ircom_communes_aggregated['Dép.'] = ["{:0>3}".format(i) for i in ircom_communes_aggregated['Dép.']]
ircom_communes_aggregated.head()

Unnamed: 0,Dép.,Commune,Libellé de la commune,Nombre de foyers fiscaux,Revenu fiscal de référence des foyers fiscaux,Nombre de foyers fiscaux imposés
,10,1,L'Abergement-Clémenciat,413,12210.0,219
,10,2,L'Abergement-de-Varey,127,3866.62,67
,10,4,Ambérieu-en-Bugey,8186,194722.0,3505
,10,5,Ambérieux-en-Dombes,862,25561.9,442
,10,6,Ambléon,59,1755.41,32


In [277]:
# Helper to remove the accents
import unicodedata as ud

def remove_accents(input_str):
    nkfd_form = ud.normalize('NFKD', str(input_str))
    return u"".join([c for c in nkfd_form if not ud.combining(c)])

In [278]:
global_arrond = pd.read_csv("../data/processed/clean_food_cities_arrond.csv")
global_arrond

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0.1,Unnamed: 0,food_item_index,nutrition-score-fr_100g,nutrition-score-uk_100g,nutrition_grade_numeric,serving_size,energy_100g,fat_100g,saturated-fat_100g,proteins_100g,carbohydrates_100g,sugars_100g,fiber_100g,Median revenue euros,Total poverty rate (%),Poverty rate (-30) (%),Poverty rate (30-39) (%),Poverty rate (40-49) (%),Poverty rate (50-59) (%),Poverty rate (60-74) (%),Poverty rate (75+) (%),Poverty rate (house owners) (%),Poverty rate (tenants) (%),Share of activity revenue (%),Share of retreat pension revenue (%),Share of heritage revenue and other (%),Share of social benefits revenue (%),Share of taxes (%),City name,Department,Region,custom_arrondissement_code,city_name,city_tag_from_food_item
0,0,362362,15.0,20.0,4,,1402.0,30.0,19.00,15.0,1.30,1.30,,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,01,84,01001,belley,belley-ain-france
1,1,362364,15.0,20.0,4,,1435.0,31.0,21.00,15.0,1.00,1.00,,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,01,84,01001,belley,belley-ain-france
2,2,362386,15.0,20.0,4,,1435.0,31.0,21.00,15.0,1.00,1.00,0.0,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,01,84,01001,belley,belley-ain-france
3,3,362366,14.0,19.0,4,,1238.0,24.0,18.00,19.0,1.00,1.00,,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,01,84,01001,belley,belley-ain-france
4,4,362389,14.0,19.0,4,150 g,1243.0,24.0,17.00,19.0,1.30,1.30,0.0,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,01,84,01001,belley,belley-ain-france
5,5,364722,11.0,11.0,4,,985.0,17.8,8.10,7.4,11.80,1.50,0.0,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,01,84,01004,nantua,nantua-ain-france
6,6,364715,11.0,11.0,4,,985.0,17.8,8.10,7.4,11.80,1.50,0.0,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,01,84,01004,nantua,nantua-ain-france
7,7,364711,13.0,13.0,4,,1017.0,18.7,9.40,9.8,9.30,2.50,,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,01,84,01004,nantua,nantua-ain-france
8,8,364710,13.0,13.0,4,,1017.0,18.7,9.40,9.8,9.30,2.50,,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,01,84,01004,nantua,nantua-ain-france
9,9,320129,15.0,20.0,4,30 g,1510.0,33.0,23.00,16.0,1.00,0.50,0.0,20137.222222,,,,,,,,,,,,,,,Servas,01,84,01002,servas,servas-ain-france


In [279]:
global_arrond['serving_size']

0                                                      NaN
1                                                      NaN
2                                                      NaN
3                                                      NaN
4                                                    150 g
5                                                      NaN
6                                                      NaN
7                                                      NaN
8                                                      NaN
9                                                     30 g
10                                                    30 g
11                                                    30 g
12                                                    30 g
13                                                    30 g
14                                                    30 g
15                                                     NaN
16                                                     N

In [280]:
global_arrond = global_arrond.drop(columns=['Unnamed: 0'])
global_arrond

Unnamed: 0,food_item_index,nutrition-score-fr_100g,nutrition-score-uk_100g,nutrition_grade_numeric,serving_size,energy_100g,fat_100g,saturated-fat_100g,proteins_100g,carbohydrates_100g,sugars_100g,fiber_100g,Median revenue euros,Total poverty rate (%),Poverty rate (-30) (%),Poverty rate (30-39) (%),Poverty rate (40-49) (%),Poverty rate (50-59) (%),Poverty rate (60-74) (%),Poverty rate (75+) (%),Poverty rate (house owners) (%),Poverty rate (tenants) (%),Share of activity revenue (%),Share of retreat pension revenue (%),Share of heritage revenue and other (%),Share of social benefits revenue (%),Share of taxes (%),City name,Department,Region,custom_arrondissement_code,city_name,city_tag_from_food_item
0,362362,15.0,20.0,4,,1402.0,30.0,19.00,15.0,1.30,1.30,,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,01,84,01001,belley,belley-ain-france
1,362364,15.0,20.0,4,,1435.0,31.0,21.00,15.0,1.00,1.00,,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,01,84,01001,belley,belley-ain-france
2,362386,15.0,20.0,4,,1435.0,31.0,21.00,15.0,1.00,1.00,0.0,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,01,84,01001,belley,belley-ain-france
3,362366,14.0,19.0,4,,1238.0,24.0,18.00,19.0,1.00,1.00,,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,01,84,01001,belley,belley-ain-france
4,362389,14.0,19.0,4,150 g,1243.0,24.0,17.00,19.0,1.30,1.30,0.0,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,01,84,01001,belley,belley-ain-france
5,364722,11.0,11.0,4,,985.0,17.8,8.10,7.4,11.80,1.50,0.0,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,01,84,01004,nantua,nantua-ain-france
6,364715,11.0,11.0,4,,985.0,17.8,8.10,7.4,11.80,1.50,0.0,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,01,84,01004,nantua,nantua-ain-france
7,364711,13.0,13.0,4,,1017.0,18.7,9.40,9.8,9.30,2.50,,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,01,84,01004,nantua,nantua-ain-france
8,364710,13.0,13.0,4,,1017.0,18.7,9.40,9.8,9.30,2.50,,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,01,84,01004,nantua,nantua-ain-france
9,320129,15.0,20.0,4,30 g,1510.0,33.0,23.00,16.0,1.00,0.50,0.0,20137.222222,,,,,,,,,,,,,,,Servas,01,84,01002,servas,servas-ain-france


In [281]:
global_arrond['Department'] = ["{:0^3}".format(d) for d in global_arrond['Department']]
global_arrond

Unnamed: 0,food_item_index,nutrition-score-fr_100g,nutrition-score-uk_100g,nutrition_grade_numeric,serving_size,energy_100g,fat_100g,saturated-fat_100g,proteins_100g,carbohydrates_100g,sugars_100g,fiber_100g,Median revenue euros,Total poverty rate (%),Poverty rate (-30) (%),Poverty rate (30-39) (%),Poverty rate (40-49) (%),Poverty rate (50-59) (%),Poverty rate (60-74) (%),Poverty rate (75+) (%),Poverty rate (house owners) (%),Poverty rate (tenants) (%),Share of activity revenue (%),Share of retreat pension revenue (%),Share of heritage revenue and other (%),Share of social benefits revenue (%),Share of taxes (%),City name,Department,Region,custom_arrondissement_code,city_name,city_tag_from_food_item
0,362362,15.0,20.0,4,,1402.0,30.0,19.00,15.0,1.30,1.30,,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,010,84,01001,belley,belley-ain-france
1,362364,15.0,20.0,4,,1435.0,31.0,21.00,15.0,1.00,1.00,,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,010,84,01001,belley,belley-ain-france
2,362386,15.0,20.0,4,,1435.0,31.0,21.00,15.0,1.00,1.00,0.0,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,010,84,01001,belley,belley-ain-france
3,362366,14.0,19.0,4,,1238.0,24.0,18.00,19.0,1.00,1.00,,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,010,84,01001,belley,belley-ain-france
4,362389,14.0,19.0,4,150 g,1243.0,24.0,17.00,19.0,1.30,1.30,0.0,18563.000000,18.618075,,22.133096,22.284813,15.726548,16.949153,,6.205567,29.609475,65.9,31.2,11.1,7.1,-15.3,Belley,010,84,01001,belley,belley-ain-france
5,364722,11.0,11.0,4,,985.0,17.8,8.10,7.4,11.80,1.50,0.0,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,010,84,01004,nantua,nantua-ain-france
6,364715,11.0,11.0,4,,985.0,17.8,8.10,7.4,11.80,1.50,0.0,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,010,84,01004,nantua,nantua-ain-france
7,364711,13.0,13.0,4,,1017.0,18.7,9.40,9.8,9.30,2.50,,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,010,84,01004,nantua,nantua-ain-france
8,364710,13.0,13.0,4,,1017.0,18.7,9.40,9.8,9.30,2.50,,16513.750000,26.494024,,35.958188,28.670923,,,,,35.472155,68.2,26.3,10.6,8.7,-13.8,Nantua,010,84,01004,nantua,nantua-ain-france
9,320129,15.0,20.0,4,30 g,1510.0,33.0,23.00,16.0,1.00,0.50,0.0,20137.222222,,,,,,,,,,,,,,,Servas,010,84,01002,servas,servas-ain-france


In [282]:
ircom_communes_aggregated['city_tag'] = ircom_communes_aggregated['Libellé de la commune'].apply(
    lambda x: remove_accents(x.lower().replace("'", "-"))
)
ircom_communes_aggregated['Dép.'] = ircom_communes_aggregated['Dép.'].astype('str')
ircom_communes_aggregated.head()

Unnamed: 0,Dép.,Commune,Libellé de la commune,Nombre de foyers fiscaux,Revenu fiscal de référence des foyers fiscaux,Nombre de foyers fiscaux imposés,city_tag
,10,1,L'Abergement-Clémenciat,413,12210.0,219,l-abergement-clemenciat
,10,2,L'Abergement-de-Varey,127,3866.62,67,l-abergement-de-varey
,10,4,Ambérieu-en-Bugey,8186,194722.0,3505,amberieu-en-bugey
,10,5,Ambérieux-en-Dombes,862,25561.9,442,amberieux-en-dombes
,10,6,Ambléon,59,1755.41,32,ambleon


In [283]:
ircom_communes_aggregated[['city_tag', 'Dép.']].drop_duplicates().sort_values(by='Dép.')

Unnamed: 0,city_tag,Dép.
,l-abergement-clemenciat,010
,priay,010
,prevessin-moens,010
,premillieu,010
,premeyzel,010
,pouillat,010
,pougny,010
,port,010
,pont-de-veyle,010
,pont-de-vaux,010


In [284]:
global_arrond[['city_name', 'Department']].drop_duplicates().sort_values(by='Department')

Unnamed: 0,city_name,Department
0,belley,010
28744,port,010
26938,saint-champ,010
26812,saint-alban,010
17552,saint-eloi,010
17551,pont-d-ain,010
14750,giron,010
14722,etrez,010
14721,blyes,010
11672,loyettes,010


In [285]:
global_arrond = pd.merge(ircom_communes_aggregated, global_arrond, 
                         left_on=['city_tag', 'Dép.'], right_on=['city_name', 'Department'])

In [286]:
pd.set_option('display.max_columns', 50)
global_arrond.head()

Unnamed: 0,Dép.,Commune,Libellé de la commune,Nombre de foyers fiscaux,Revenu fiscal de référence des foyers fiscaux,Nombre de foyers fiscaux imposés,city_tag,food_item_index,nutrition-score-fr_100g,nutrition-score-uk_100g,nutrition_grade_numeric,serving_size,energy_100g,fat_100g,saturated-fat_100g,proteins_100g,carbohydrates_100g,sugars_100g,fiber_100g,Median revenue euros,Total poverty rate (%),Poverty rate (-30) (%),Poverty rate (30-39) (%),Poverty rate (40-49) (%),Poverty rate (50-59) (%),Poverty rate (60-74) (%),Poverty rate (75+) (%),Poverty rate (house owners) (%),Poverty rate (tenants) (%),Share of activity revenue (%),Share of retreat pension revenue (%),Share of heritage revenue and other (%),Share of social benefits revenue (%),Share of taxes (%),City name,Department,Region,custom_arrondissement_code,city_name,city_tag_from_food_item
0,10,11,Apremont,206,5658.56,108,apremont,431441,16.0,16.0,4,,883.0,12.0,4.4,25.0,0.6,0.5,0.5,21885.714286,,,,,,,,,,,,,,,Apremont,10,84,1004,apremont,apremont-vendee-france
1,10,11,Apremont,206,5658.56,108,apremont,348749,15.0,15.0,4,,703.0,9.3,3.8,19.0,1.9,1.2,0.3,21885.714286,,,,,,,,,,,,,,,Apremont,10,84,1004,apremont,apremont-vendee-france
2,10,11,Apremont,206,5658.56,108,apremont,348720,16.0,16.0,4,,900.0,12.0,4.4,25.0,0.6,0.5,,21885.714286,,,,,,,,,,,,,,,Apremont,10,84,1004,apremont,apremont-vendee-france
3,10,11,Apremont,206,5658.56,108,apremont,348741,11.0,11.0,4,50 g (1 tranche),514.0,3.66,1.48,21.13,1.13,1.13,,21885.714286,,,,,,,,,,,,,,,Apremont,10,84,1004,apremont,apremont-vendee-france
4,10,11,Apremont,206,5658.56,108,apremont,348708,15.0,15.0,4,Une tranche 100 g,699.0,9.4,3.9,18.7,1.9,1.7,,21885.714286,,,,,,,,,,,,,,,Apremont,10,84,1004,apremont,apremont-vendee-france


In [287]:
global_arrond.drop(columns=["Dép.", "Commune", "Libellé de la commune", "Revenu fiscal de référence des foyers fiscaux"],
                   inplace=True)
global_arrond.rename(columns={"Nombre de foyers fiscaux": "Fiscal household number", 
                              "Nombre de foyers fiscaux imposés": "Taxed households number",
                             })

Unnamed: 0,Fiscal household number,Taxed households number,city_tag,food_item_index,nutrition-score-fr_100g,nutrition-score-uk_100g,nutrition_grade_numeric,serving_size,energy_100g,fat_100g,saturated-fat_100g,proteins_100g,carbohydrates_100g,sugars_100g,fiber_100g,Median revenue euros,Total poverty rate (%),Poverty rate (-30) (%),Poverty rate (30-39) (%),Poverty rate (40-49) (%),Poverty rate (50-59) (%),Poverty rate (60-74) (%),Poverty rate (75+) (%),Poverty rate (house owners) (%),Poverty rate (tenants) (%),Share of activity revenue (%),Share of retreat pension revenue (%),Share of heritage revenue and other (%),Share of social benefits revenue (%),Share of taxes (%),City name,Department,Region,custom_arrondissement_code,city_name,city_tag_from_food_item
0,206,108,apremont,431441,16.0,16.0,4,,883.0,12.00,4.40,25.00,0.60,0.50,0.5,21885.714286,,,,,,,,,,,,,,,Apremont,010,84,01004,apremont,apremont-vendee-france
1,206,108,apremont,348749,15.0,15.0,4,,703.0,9.30,3.80,19.00,1.90,1.20,0.3,21885.714286,,,,,,,,,,,,,,,Apremont,010,84,01004,apremont,apremont-vendee-france
2,206,108,apremont,348720,16.0,16.0,4,,900.0,12.00,4.40,25.00,0.60,0.50,,21885.714286,,,,,,,,,,,,,,,Apremont,010,84,01004,apremont,apremont-vendee-france
3,206,108,apremont,348741,11.0,11.0,4,50 g (1 tranche),514.0,3.66,1.48,21.13,1.13,1.13,,21885.714286,,,,,,,,,,,,,,,Apremont,010,84,01004,apremont,apremont-vendee-france
4,206,108,apremont,348708,15.0,15.0,4,Une tranche 100 g,699.0,9.40,3.90,18.70,1.90,1.70,,21885.714286,,,,,,,,,,,,,,,Apremont,010,84,01004,apremont,apremont-vendee-france
5,206,108,apremont,246718,16.0,16.0,4,,900.0,12.00,4.40,25.00,0.60,0.50,,21885.714286,,,,,,,,,,,,,,,Apremont,010,84,01004,apremont,apremont-vendee-france
6,206,108,apremont,370790,16.0,16.0,4,,900.0,12.00,4.40,25.00,0.60,0.50,,21885.714286,,,,,,,,,,,,,,,Apremont,010,84,01004,apremont,apremont-vendee-france
7,206,108,apremont,348764,15.0,15.0,4,,699.0,9.40,3.90,18.70,1.90,0.00,,21885.714286,,,,,,,,,,,,,,,Apremont,010,84,01004,apremont,apremont-vendee-france
8,206,108,apremont,348710,15.0,15.0,4,,699.0,9.40,3.90,18.70,1.90,1.70,,21885.714286,,,,,,,,,,,,,,,Apremont,010,84,01004,apremont,apremont-vendee-france
9,206,108,apremont,348768,16.0,16.0,4,,900.0,12.00,4.40,25.00,0.60,0.50,,21885.714286,,,,,,,,,,,,,,,Apremont,010,84,01004,apremont,apremont-vendee-france


In [288]:
global_arrond.to_csv("../data/processed/clean_food_cities_arrond_with_tax.csv")

In [289]:
global_arrond['serving_size']

0                      NaN
1                      NaN
2                      NaN
3         50 g (1 tranche)
4        Une tranche 100 g
5                      NaN
6                      NaN
7                      NaN
8                      NaN
9                      NaN
10        Une tranche 90 g
11        Une tranche 90 g
12                     NaN
13                  12,5 g
14                     NaN
15                     NaN
16                     NaN
17                     NaN
18                     NaN
19                     NaN
20                     NaN
21        50 g (1 tranche)
22       Une tranche 100 g
23                     NaN
24                     NaN
25                     NaN
26                     NaN
27                     NaN
28        Une tranche 90 g
29        Une tranche 90 g
               ...        
34646                125 g
34647                  NaN
34648                  NaN
34649              13,5 cl
34650                20 cl
34651                350 g
3

In [290]:
global_arrond.count()

Nombre de foyers fiscaux                   34676
Nombre de foyers fiscaux imposés           34676
city_tag                                   34676
food_item_index                            34676
nutrition-score-fr_100g                    32389
nutrition-score-uk_100g                    32389
nutrition_grade_numeric                    34676
serving_size                               19666
energy_100g                                34386
fat_100g                                   34501
saturated-fat_100g                         33894
proteins_100g                              34368
carbohydrates_100g                         34187
sugars_100g                                33841
fiber_100g                                 17839
Median revenue euros                       33529
Total poverty rate (%)                     14668
Poverty rate (-30) (%)                      4359
Poverty rate (30-39) (%)                    6257
Poverty rate (40-49) (%)                    7350
Poverty rate (50-59)

In [292]:
global_arrond.to_csv("../data/processed/clean_food_cities_arrond_with_tax.csv")

In [293]:
global_arrond.count()

Nombre de foyers fiscaux                   34676
Nombre de foyers fiscaux imposés           34676
city_tag                                   34676
food_item_index                            34676
nutrition-score-fr_100g                    32389
nutrition-score-uk_100g                    32389
nutrition_grade_numeric                    34676
serving_size                               19666
energy_100g                                34386
fat_100g                                   34501
saturated-fat_100g                         33894
proteins_100g                              34368
carbohydrates_100g                         34187
sugars_100g                                33841
fiber_100g                                 17839
Median revenue euros                       33529
Total poverty rate (%)                     14668
Poverty rate (-30) (%)                      4359
Poverty rate (30-39) (%)                    6257
Poverty rate (40-49) (%)                    7350
Poverty rate (50-59)