# Accès aux interfaces numériques
## Taux de couverture mobile (2G, 3G, 4G ...)

In [113]:
# Donnees de mon reseau mobile: sites mobiles en France métropolitaine au 30 juin 2020
# https://www.data.gouv.fr/fr/datasets/r/77ca5457-c1fe-4450-9761-1a6a598921c0
import pandas as pd
import numpy as np
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

external_data = Path('../data/external/')
processed_data = Path('../data/processed/')
raw_data = Path('../data/raw/')
interim_data = Path('../data/interim/')

In [114]:
df = pd.read_csv(raw_data/'sites_mobiles_2020_juin.csv', delimiter=';', error_bad_lines=False, low_memory=False)
df = df.drop_duplicates()
df

Unnamed: 0,code_op,nom_op,num_site,x_lambert_93,y_lambert_93,nom_reg,nom_dep,insee_dep,nom_com,insee_com,site_2g,site_3g,site_4g,mes_4g_trim,site_ZB,site_DCC
0,20801,Orange,0012290010,872639,6570768,AUVERGNE RHONE ALPES,AIN,01,BOURG EN BRESSE,01053,1,1,1,0,0,0
1,20801,Orange,0012290011,860279,6529851,AUVERGNE RHONE ALPES,AIN,01,DAGNEUX,01142,1,1,1,0,0,0
2,20801,Orange,0012290012,852231,6532639,AUVERGNE RHONE ALPES,AIN,01,TRAMOYES,01424,1,1,1,0,0,0
3,20801,Orange,0012290014,901020,6564540,AUVERGNE RHONE ALPES,AIN,01,NANTUA,01269,1,1,1,0,0,0
4,20801,Orange,0012290016,903596,6563175,AUVERGNE RHONE ALPES,AIN,01,NEYROLLES,01274,1,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86439,20820,Bouygues Telecom,ZPH84201,908817,6299061,PROVENCE-ALPES-COTE D'AZUR,VAUCLUSE,84,GRAMBOIS,84052,0,1,0,0,1,0
86440,20820,Bouygues Telecom,ZPH88202,907166,6820434,GRAND EST,VOSGES,88,MARTIGNY-LES-GERBONVAUX,88290,0,1,0,0,1,0
86441,20820,Bouygues Telecom,ZPH88203,916863,6766415,GRAND EST,VOSGES,88,GRIGNONCOURT,88220,0,1,1,0,1,0
86442,20820,Bouygues Telecom,ZPH88204,919671,6785054,GRAND EST,VOSGES,88,PROVENCHERES-LES-DARNEY,88360,0,1,1,0,1,0


Pour calculer le taux de couverture mobile, on va suposer que obtenir:
- la 4G correspond à un poids de 50%
- la 3G --> poids de 30%
- la 2G --> poids de 20%

In [115]:
cols = 'nom_op nom_com insee_dep insee_com site_2g site_3g site_4g'.split()
df = df[cols]
df.head()

Unnamed: 0,nom_op,nom_com,insee_dep,insee_com,site_2g,site_3g,site_4g
0,Orange,BOURG EN BRESSE,1,1053,1,1,1
1,Orange,DAGNEUX,1,1142,1,1,1
2,Orange,TRAMOYES,1,1424,1,1,1
3,Orange,NANTUA,1,1269,1,1,1
4,Orange,NEYROLLES,1,1274,1,1,1


In [116]:
df['nom_op'].value_counts()

Orange              25141
SFR                 22267
Bouygues Telecom    21005
Free Mobile         17918
Name: nom_op, dtype: int64

In [117]:
# Il n'y a que la moitié des communes avec de la donnée
df['insee_com'].unique().shape

(17075,)

In [118]:
# 13 000 communes couvertes par Orange par ex.
df[df['nom_op'] == 'Orange']['insee_com'].unique().shape

(13371,)

In [119]:
df['site_2g'] = df['site_2g'].map({1: 0.2, 0:0})
df['site_3g'] = df['site_3g'].map({1: 0.3, 0:0})
df['site_4g'] = df['site_4g'].map({1: 0.5, 0:0})

df['COUVERTURE_MOBILE'] = df[['site_2g', 'site_3g', 'site_4g']].sum(axis=1)
df = df.rename(columns={'insee_com': 'CODE_INSEE', 'insee_dep': 'DEP'})
df = df.drop(columns=['site_2g', 'site_3g', 'site_4g'])

In [120]:
# COMMUNES

communes = pd.read_csv(raw_data/'table_insee_libcom_dep.csv').drop(columns=['Unnamed: 0'])
communes

Unnamed: 0,CODE_INSEE,LIBCOM,DEP
0,01001,L'Abergement-Clémenciat,01
1,01002,L'Abergement-de-Varey,01
2,01004,Ambérieu-en-Bugey,01
3,01005,Ambérieux-en-Dombes,01
4,01006,Ambléon,01
...,...,...,...
35005,97613,M'Tsangamouji,976
35006,97614,Ouangani,976
35007,97615,Pamandzi,976
35008,97616,Sada,976


In [122]:
# MERGE des données avec les communes + code INSEE

res = df.merge(communes, on=['CODE_INSEE', 'DEP'], how='inner')
res

Unnamed: 0,nom_op,nom_com,DEP,CODE_INSEE,COUVERTURE_MOBILE,LIBCOM
0,Orange,BOURG EN BRESSE,01,01053,1.0,Bourg-en-Bresse
1,Orange,BOURG EN BRESSE,01,01053,1.0,Bourg-en-Bresse
2,Orange,BOURG EN BRESSE,01,01053,1.0,Bourg-en-Bresse
3,Orange,BOURG EN BRESSE,01,01053,1.0,Bourg-en-Bresse
4,Orange,BOURG EN BRESSE,01,01053,1.0,Bourg-en-Bresse
...,...,...,...,...,...,...
83885,Bouygues Telecom,MISSEGRE,11,11235,0.5,Missègre
83886,Bouygues Telecom,ALZON,30,30009,0.5,Alzon
83887,Bouygues Telecom,PAILHARES,07,07170,1.0,Pailharès
83888,Bouygues Telecom,GRURY,71,71227,1.0,Grury


In [131]:
couv_mobile = res.groupby('CODE_INSEE')['COUVERTURE_MOBILE'].median().reset_index()
couv_mobile = communes.merge(couv_mobile, on=['CODE_INSEE'], how='left')
couv_mobile

Unnamed: 0,CODE_INSEE,LIBCOM,DEP,COUVERTURE_MOBILE
0,01001,L'Abergement-Clémenciat,01,
1,01002,L'Abergement-de-Varey,01,0.5
2,01004,Ambérieu-en-Bugey,01,1.0
3,01005,Ambérieux-en-Dombes,01,1.0
4,01006,Ambléon,01,0.8
...,...,...,...,...
35005,97613,M'Tsangamouji,976,
35006,97614,Ouangani,976,
35007,97615,Pamandzi,976,
35008,97616,Sada,976,


In [132]:
couv_mobile['COUVERTURE_MOBILE'].value_counts()

1.00    11122
0.80     1869
0.90     1592
0.50     1000
0.40      469
0.65      167
0.30      125
0.75       67
0.20       44
0.55       25
0.60       13
0.70        8
0.85        3
0.35        1
Name: COUVERTURE_MOBILE, dtype: int64

In [130]:
couv_mobile.dropna().shape

(16505, 4)

In [14]:
couv_mobile.to_csv(f'{interim_data}/taux_couverture_mobile.csv', index=False)

## DOM -TOM

Cette liste présente, pour chaque site, le code MCC-MNC de l’opérateur (operateur), la longitude (X) et la latitude (Y) du site, et si le site est équipé en 2G (C2G) et/ou en 3G (C3G) et/ou en 4G (C4G).

In [86]:
# Mapping entre codes postaux et code INSEE
map_zipcode = pd.read_csv(external_data/'data_gps.csv').drop(columns='Unnamed: 0')
map_zipcode['CODE_INSEE'] = map_zipcode['CODE_INSEE'].astype(str)
map_zipcode['CODE_POSTAL'] = map_zipcode['CODE_POSTAL'].astype(str)

# Ile SAINT MARTIN  -  https://www.insee.fr/fr/information/2028040
#map_zipcode = map_zipcode.append({'CODE_INSEE': ' 97801', 'CODE_POSTAL': '97150'}, ignore_index=True)
map_zipcode = map_zipcode[['CODE_INSEE', 'CODE_POSTAL']]
map_zipcode

Unnamed: 0,CODE_INSEE,CODE_POSTAL
0,68336,68210
1,68338,68230
2,68350,68420
3,68355,68640
4,68358,68230
...,...,...
35002,68322,68700
35003,68326,68780
35004,68327,68510
35005,68329,68140


In [159]:
# Reunion
df = pd.read_csv(raw_data/f'{"MAY"}_sites_mobiles_2020_T2.csv', delimiter=';', error_bad_lines=False, low_memory=False)
df

Unnamed: 0,Operateur,X,Y,C2G,C3G,C4G
0,64710,45.231287,-12.777744,1,1,1
1,64710,45.229368,-12.784827,1,1,1
2,64710,45.077230,-12.686637,1,1,1
3,64710,45.225070,-12.780101,1,1,1
4,64710,45.216505,-12.783680,1,1,1
...,...,...,...,...,...,...
144,64702,45.288861,-12.792417,1,1,1
145,64702,45.279583,-12.797583,1,1,1
146,64702,45.103333,-12.851861,1,1,1
147,64702,45.136500,-12.787833,1,1,1


In [161]:
zipcodes = df.apply(get_zipcode, axis=1, geolocator=geolocator, lon_field='X', lat_field='Y')

In [156]:
zipcodes[0].raw['address']['postcode']

'96700'

In [169]:
import geopy

geolocator = geopy.Nominatim(user_agent="geoapiExercises")

def get_zipcode(df, geolocator, lat_field, lon_field):
    location = geolocator.reverse((df[lat_field], df[lon_field]))
    return location.raw['address']['postcode']


def get_couverture_mobile(DOMTOM='MAR'):
    df = pd.read_csv(raw_data/f'{DOMTOM}_sites_mobiles_2020_T2.csv', delimiter=';', error_bad_lines=False, low_memory=False)
    zipcodes = df.apply(get_zipcode, axis=1, geolocator=geolocator, lon_field='X', lat_field='Y')
    df['CODE_POSTAL'] = zipcodes

    df['C2G'] = df['C2G'].map({1: 0.2, 0:0})
    df['C3G'] = df['C3G'].map({1: 0.3, 0:0})
    df['C4G'] = df['C4G'].map({1: 0.5, 0:0})

    df['COUVERTURE_MOBILE'] = df[['C2G', 'C3G', 'C4G']].sum(axis=1)
    df = df.drop(columns=['C2G', 'C3G', 'C4G', 'X', 'Y'])

    # Merge des données avec mapping (zipcode-insee)
    res = df.merge(map_zipcode, on='CODE_POSTAL', how='inner')
    couv_mobile = res.groupby('CODE_INSEE')['COUVERTURE_MOBILE'].median().reset_index()
    couv_mobile = communes.merge(couv_mobile, on=['CODE_INSEE'], how='left')
    
    return couv_mobile
    

In [168]:
# TODO: STM ? STB ? DOMTOM ?   +   REU, GUY marche pas niveau API geocode
#for domtom in 'GUA GUY MAR MAY REU STB STM'.split():

data = pd.DataFrame()
for domtom in "MAR ".split():
    temp = get_couverture_mobile(domtom)
    data = pd.concat([data, temp], axis=0)

KeyError: 'postcode'

In [None]:
data.to_csv(f'{interim_data}/taux_couverture_mobile_DOMTOM.csv', index=False)

#### MERGE metropole + domtom

In [15]:
domtom = pd.read_csv(raw_data/'MAR_sites_mobiles_2020_T2.csv', delimiter=';', error_bad_lines=False, low_memory=False)
domtom

Unnamed: 0,Operateur,X,Y,C2G,C3G,C4G
0,34020,-60.949451,14.691945,1,1,1
1,34020,-60.981388,14.677198,1,1,1
2,34020,-61.029451,14.778889,1,1,1
3,34020,-61.066673,14.813611,1,1,1
4,34020,-61.106951,14.753889,1,0,0
...,...,...,...,...,...,...
479,34002,-61.066900,14.518100,1,1,1
480,34002,-60.876100,14.602000,0,1,1
481,34002,-60.984100,14.765900,1,1,1
482,34002,-60.995800,14.646600,1,1,1


In [22]:
domtom['Operateur'].value_counts()

34001    199
34002    147
34020    138
Name: Operateur, dtype: int64