In [1]:
import os
import glob

import numpy as np
import pandas as pd

In [2]:
folder_cantonales = './CANTONALES_1988-2011-csv/'
folder_legislatives = './LEGISLATIVES_1958-2012-csv/'
folder_presidentielles = './PRESIDENTIELLES_1965-2012-csv/'
cantonales = glob.glob(folder_cantonales + '*.csv')
legislatives = glob.glob(folder_legislatives + '*.csv')
presidentielles = glob.glob(folder_presidentielles + '*.csv')

In [3]:
def splitting_cantonales(folder):
    result = pd.DataFrame(columns=['Code département', 'Département', 'Code canton', 'Canton', 'Inscrits',
                                   'Abstentions', 'Votants', 'Blancs et nuls','Exprimés','year', 'election_round'])
    for f in folder:
        _, file = os.path.split(f)
        name,_ = os.path.splitext(file)
        year = int(name[9:13])
        election_round = name[13:15]
        df = pd.read_csv(f)
        df['year'] = year
        df['election_round'] = election_round
        result = pd.concat([result, df],join='inner', ignore_index=True)
    return result

In [4]:
def splitting_legislatives(folder):
    result = pd.DataFrame(columns=['Code département','département', 'circonscription', 'Inscrits', 'Votants', 'Exprimés', 'Blancs et nuls',
                                   'Taux de participation (%)', 'year', 'election_round'])
    for f in folder:
        _, file = os.path.split(f)
        name,_ = os.path.splitext(file)
        year = int(name[9:13])
        election_round = name[13:15]
        df = pd.read_csv(f)
        df['year'] = year
        df['election_round'] = election_round
        result = pd.concat([result, df],join='inner', ignore_index=True)
    return result

In [5]:
def splitting_presidentielles(folder):
    result = pd.DataFrame(columns=['Code département','département','circonscription','Inscrits','Votants', 'Exprimés', 'Blancs et nuls','year', 'election_round'])
    for f in folder:
        _, file = os.path.split(f)
        name,_ = os.path.splitext(file)
        year = int(name[10:14])
        election_round = name[14:16]
        df = pd.read_csv(f)
        df['year'] = year
        df['election_round'] = election_round
        result = pd.concat([result, df],join='inner', ignore_index=True)
    return result

In [6]:
df_cantonales = splitting_cantonales(cantonales)
df_cantonales['election'] = 'cantonales'
df_cantonales.rename(columns={'Département':'département'}, inplace=True)
df_cantonales.sample(10)

Unnamed: 0,Code département,département,Code canton,Canton,Inscrits,Abstentions,Votants,Blancs et nuls,Exprimés,year,election_round,election
7626,50,MANCHE,6,BRÉHAL,8404,2857,5547,208,5339,2004,t2,cantonales
1726,22,COTES D'ARMOR,26,MONCONTOUR,9406,4764,4642,189,4453,2011,t1,cantonales
7652,51,MARNE,41,REIMS-8,9010,3569,5441,643,4798,2004,t2,cantonales
1126,86,VIENNE,36,POITIERS-6,8409,3555,4854,213,4641,2001,t2,cantonales
244,21,COTE D'OR,6,BEAUNE-SUD,13600,6348,7252,416,6836,2001,t2,cantonales
4495,59,NORD,12,BOURBOURG,13693,3112,10581,439,10142,2001,t1,cantonales
6981,6,ALPES MARITIMES,36,MANDELIEU-CANNES-OUEST,25236,9456,15780,1715,14065,2004,t2,cantonales
3962,32,GERS,29,VIC-FEZENSAC,4748,824,3924,185,3739,2001,t1,cantonales
3199,94,VAL DE MARNE,5,CACHAN,15781,9693,6088,126,5962,2011,t1,cantonales
5775,30,GARD,37,VAUVERT,14538,6764,7774,423,7351,2011,t2,cantonales


In [7]:
df_cantonales.year.drop_duplicates().tolist()

[2001, 2011, 2004, 2008]

In [8]:
df_legislatives = splitting_legislatives(legislatives)
df_legislatives['election']='legislative'

In [9]:
df_legislatives.sample(10)

Unnamed: 0,Code département,département,circonscription,Inscrits,Votants,Exprimés,Blancs et nuls,Taux de participation (%),year,election_round,election
616,13,BOUCHES-DU-RHONE,1,73877,40340,38873,1367,"54,6%",2012,t2,legislative
373,70,HAUTE-SAONE,1,62041,42849,41869,980,"69,07%",2002,t1,legislative
1487,81,TARN,2,79976,54631,53079,1552,"68,31%",2007,t2,legislative
900,69,RHONE,3,70342,37138,36222,916,"52,8%",2012,t2,legislative
3252,985,MAYOTTE,1,64775,31797,30330,1467,"49,09%",2007,t1,legislative
1814,54,MEURTHE-ET-MOSELLE,3,56593,32096,31148,948,"56,71%",2002,t2,legislative
7,2,AISNE,4,76158,47010,46182,828,"61,73%",2002,t1,legislative
2513,76,SEINE-MARITIME,7,89254,49187,48468,719,"55,11%",2012,t1,legislative
1325,54,MEURTHE-ET-MOSELLE,1,52930,29791,29164,627,"56,28%",2007,t2,legislative
760,42,LOIRE,2,57280,28246,27449,797,"49,31%",2012,t2,legislative


In [10]:
df_legislatives.year.drop_duplicates().tolist()

[2002, 2012, 2007]

In [11]:
df_presidentielles = splitting_presidentielles(presidentielles)
df_presidentielles['election']='presidential'

In [12]:
df_presidentielles.sample(10)

Unnamed: 0,Code département,département,circonscription,Inscrits,Votants,Exprimés,Blancs et nuls,year,election_round,election
1070,89,YONNE,2,73643,62152,59031,3121,2007,t2,presidential
880,60,OISE,5,69417,58454,55784,2670,2007,t2,presidential
618,12,AVEYRON,3,71880,63989,60952,3037,2007,t2,presidential
158,34,HERAULT,5,95567,82368,81186,1182,2007,t1,presidential
2768,82,TARN-ET-GARONNE,2,77887,60432,58063,2369,2002,t1,presidential
3162,59,NORD,10,80282,59271,58135,1136,2012,t1,presidential
2123,75,PARIS,12,71329,60266,58404,1862,2012,t2,presidential
845,57,MOSELLE,7,79660,65421,62293,3128,2007,t2,presidential
1037,81,TARN,3,59420,52553,49952,2601,2007,t2,presidential
895,62,PAS DE CALAIS,10,72349,57692,54893,2799,2007,t2,presidential


In [13]:
df_presidentielles.year.drop_duplicates().tolist()

[2007, 2002, 2012]

In [14]:
dfs = [df_cantonales, df_presidentielles, df_legislatives]

In [15]:
result = pd.concat(dfs,join='inner', ignore_index=True)

In [16]:
result

Unnamed: 0,Code département,département,Inscrits,Votants,Blancs et nuls,Exprimés,year,election_round,election
0,1,AIN,12513,6169,598,5571,2001,t2,cantonales
1,1,AIN,8358,4793,249,4544,2001,t2,cantonales
2,1,AIN,2619,1852,94,1758,2001,t2,cantonales
3,1,AIN,5830,3847,140,3707,2001,t2,cantonales
4,1,AIN,3592,2304,181,2123,2001,t2,cantonales
...,...,...,...,...,...,...,...,...,...
20258,986,WALLIS ET FUTUNA,11160,7849,65,7784,2007,t1,legislative
20259,987,POLYNESIE FRANCAISE,93813,52894,413,52481,2007,t1,legislative
20260,987,POLYNESIE FRANCAISE,73916,39222,408,38814,2007,t1,legislative
20261,988,NOUVELLE-CALEDONIE,68940,37863,808,37055,2007,t1,legislative


In [17]:
#result.to_csv('elections.csv')