# Import des modules nécessaires

In [22]:
    # Pour manipuler efficacement des tables de données dans Python
    import pandas as pd

    # Pour faire des requêtes GET et POST
    import requests
    
    # Pour gérer les date
    import datetime as dt
    
    # sql
    from sqlalchemy import create_engine

In [23]:
print(pd.__version__)

0.23.0


In [24]:
print(requests.__version__)

2.18.4


# Requêtage d'une API REST

Pour lancer une requête GET, c'est ultra-simple : on utilise `requests.get(url)`.

Si vous voulez glisser des paramètres avec la requête, utilisez l'argument optionnel `params`, qui accepte un simple dictionnaire.

In [25]:
r = requests.get("https://data.angers.fr/api/records/1.0/search/",
                 params={
                     'dataset': 'horaires-theoriques-et-arrets-du-reseau-irigo-gtfs',
                     'rows': 10
                 })

In [26]:
g = requests.get("https://data.angers.fr/api/records/1.0/search/",
                 params={
                     'dataset': 'bus-tram-position-tr',
                     'rows': 200
                 })

Regardons la nature de ce qui nous est renvoyé.

In [27]:
type(g)

requests.models.Response

C'est un objet de la classe `Response`. Si vous voulez connaitre toutes les méthodes attachées, faites `help(r)`

In [28]:
help(g)

Help on Response in module requests.models object:

class Response(builtins.object)
 |  The :class:`Response <Response>` object, which contains a
 |  server's response to an HTTP request.
 |  
 |  Methods defined here:
 |  
 |  __bool__(self)
 |      Returns True if :attr:`status_code` is less than 400.
 |      
 |      This attribute checks if the status code of the response is between
 |      400 and 600 to see if there was a client error or a server error. If
 |      the status code, is between 200 and 400, this will return True. This
 |      is **not** a check to see if the response code is ``200 OK``.
 |  
 |  __enter__(self)
 |  
 |  __exit__(self, *args)
 |  
 |  __getstate__(self)
 |  
 |  __init__(self)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __iter__(self)
 |      Allows you to use a response as an iterator.
 |  
 |  __nonzero__(self)
 |      Returns True if :attr:`status_code` is less than 400.
 |      
 |      This attribute checks if

Pour tester que tout a bien fonctionnné, on va vérifié le statut.

In [29]:
g.raise_for_status()

Si ça dit rien c'est que c'est bon :) Dans le doute, regardons le statut...

In [30]:
g.status_code

200

Statut 200 = succès !!

In [31]:
g.json()

{'nhits': 125,
 'parameters': {'dataset': ['bus-tram-position-tr'],
  'timezone': 'UTC',
  'rows': 200,
  'format': 'json'},
 'records': [{'datasetid': 'bus-tram-position-tr',
   'recordid': 'd2707eca39de09f79bcef7be9101e0af08c1a754',
   'fields': {'etat': 'HLPS',
    'type': 'OmniSTDE6',
    'ecart': 0,
    'novh': '803',
    'dest': 'PAS EN SERVICE',
    'idparcours': 268720384,
    'cap': 185,
    'nomarret': 'SORTIE DEPOT ST BART',
    'idvh': 268436259,
    'iddesserte': 268720385,
    'mnemoligne': '04',
    'y': 2279028,
    'nomligne': 'BEAUCOUZE <> ST BARTHELEMY',
    'mnemoarret': 'DEPOT SD',
    'idarret': 671375,
    'harret': '2018-06-19T07:09:46+00:00',
    'x': 386788,
    'sv': '0412',
    'coordonnees': [47.476323, -0.49280852],
    'idligne': 268435460,
    'numarret': 6562},
   'geometry': {'type': 'Point', 'coordinates': [-0.49280852, 47.476323]},
   'record_timestamp': '2018-06-19T07:16:13+00:00'},
  {'datasetid': 'bus-tram-position-tr',
   'recordid': 'a0a5b38fdf8

# Remplissage de DataFrame

On peut construire une `DataFrame` directement à partir d'un "dictionnaires de colonnes".

In [32]:
dd = g.json()['records']

In [38]:
dd[0]

{'datasetid': 'bus-tram-position-tr',
 'recordid': 'd2707eca39de09f79bcef7be9101e0af08c1a754',
 'fields': {'etat': 'HLPS',
  'type': 'OmniSTDE6',
  'ecart': 0,
  'novh': '803',
  'dest': 'PAS EN SERVICE',
  'idparcours': 268720384,
  'cap': 185,
  'nomarret': 'SORTIE DEPOT ST BART',
  'idvh': 268436259,
  'iddesserte': 268720385,
  'mnemoligne': '04',
  'y': 2279028,
  'nomligne': 'BEAUCOUZE <> ST BARTHELEMY',
  'mnemoarret': 'DEPOT SD',
  'idarret': 671375,
  'harret': '2018-06-19T07:09:46+00:00',
  'x': 386788,
  'sv': '0412',
  'coordonnees': [47.476323, -0.49280852],
  'idligne': 268435460,
  'numarret': 6562},
 'geometry': {'type': 'Point', 'coordinates': [-0.49280852, 47.476323]},
 'record_timestamp': '2018-06-19T07:16:13+00:00'}

In [39]:
#Arret
id_arret = [elem['fields']['idarret'] for elem in dd]
nom_arret = [elem['fields']['nomarret'] for elem in dd]
mne_arret = [elem['fields']['mnemoarret'] for elem in dd]

arret = pd.DataFrame({
    'id_arret': id_arret,
    'nom_arret': nom_arret,
    'mne_arret': mne_arret
})

#arret.info()

arret

Unnamed: 0,id_arret,nom_arret,mne_arret
0,671375,SORTIE DEPOT ST BART,DEPOT SD
1,671973,SORTIE BUS CTB,SDCTB
2,671375,SORTIE DEPOT ST BART,DEPOT SD
3,671963,ST BARTH COLLEGE,SBARTHEL
4,672636,PISCINE MONPLAISIR,P-MONPL
5,671973,SORTIE BUS CTB,SDCTB
6,672520,L'ATOLL - BEAUCOUZE,ATOLLBEA
7,671157,ST AUBIN LA SALLE,BARONNER
8,672083,ZI. EST,ZIEST
9,671677,PLACE LORRAINE,LOHUIT


In [40]:
#Bus
id_vh = [elem['fields']['idvh'] for elem in dd]
type_vh = [elem['fields']['type'] for elem in dd]
etat_vh = [elem['fields']['etat'] for elem in dd]

bus = pd.DataFrame({
    'id_vh': id_vh,
    'type_vh': type_vh,
    'etat_vh': etat_vh
})

#bus.info()

bus

Unnamed: 0,id_vh,type_vh,etat_vh
0,268436259,OmniSTDE6,HLPS
1,268436120,OMNIART,HLPS
2,268435899,OMNICITY,HLPS
3,268436158,AGORA123456789ABCDEFART,TDEP
4,268435879,OMNICITY,HL
5,268436137,OmniArtE6,HLPS
6,268436145,OmniArtE6,TDEP
7,268435905,OMNICITY,TDEP
8,268436270,OmniSTDE6,TDEP
9,268435909,OMNICITY,LIGN


In [41]:
#Ligne
id_ligne = [elem['fields']['idligne'] for elem in dd]
nom_ligne = [elem['fields']['nomligne'] for elem in dd]
mne_ligne = [elem['fields']['mnemoligne'] for elem in dd]

ligne = pd.DataFrame({
    'id_ligne': id_arret,
    'nom_ligne': nom_ligne,
    'mne_ligne': mne_ligne
})

#ligne.info()

ligne

Unnamed: 0,id_ligne,nom_ligne,mne_ligne
0,671375,BEAUCOUZE <> ST BARTHELEMY,04
1,671973,BELLE BEILLE <> MONPLAISIR,01
2,671375,BOUCHEMAINE <> Z I EST,06
3,671963,BEAUCOUZE <> ST BARTHELEMY,04
4,672636,Piscine MONPLAISIR,535
5,671973,BEAUCOUZE <> ST BARTHELEMY,04
6,672520,BEAUCOUZE <> ST BARTHELEMY,04
7,671157,M-MARCILLE <> ST AUBIN LA SALLE,12
8,672083,BOUCHEMAINE <> Z I EST,06
9,671677,M-MARCILLE <> ST AUBIN LA SALLE,12


In [42]:
#Trajet
id_bus = [elem['fields']['idvh'] for elem in dd]
id_ligne = [elem['fields']['idligne'] for elem in dd]
latitude = [elem['fields']['coordonnees'][0] for elem in dd]
longitude = [elem['fields']['coordonnees'][1] for elem in dd]

trajet = pd.DataFrame({
    'id_bus': id_bus,
    'id_ligne': id_ligne,
    'latitude': latitude,
    'longitude': longitude
})

#trajet.info()

trajet

Unnamed: 0,id_bus,id_ligne,latitude,longitude
0,268436259,268435460,47.476323,-0.492809
1,268436120,268435457,47.495681,-0.569286
2,268435899,268435462,47.475119,-0.492731
3,268436158,268435460,47.473040,-0.486090
4,268435879,268435515,47.475602,-0.492491
5,268436137,268435460,47.495600,-0.569680
6,268436145,268435460,47.484244,-0.626409
7,268435905,268435468,47.489367,-0.503753
8,268436270,268435462,47.474899,-0.493994
9,268435909,268435468,47.471773,-0.546648


In [43]:
#Etape
id_arret = [elem['fields']['idarret'] for elem in dd]
harret = [elem['fields']['harret'] for elem in dd]
record_timestamp = [elem['record_timestamp'] for elem in dd]
ecart = [elem['fields']['ecart'] for elem in dd]

etape = pd.DataFrame({
    'id_arret': id_arret,
    'heure_theorique': harret,
    'ecart': ecart,
    'record_timestamp': record_timestamp
})

etape['record_timestamp'] = pd.to_datetime(etape['record_timestamp'])

def transfo(row):
    return row['record_timestamp'] + dt.timedelta(seconds=row['ecart'])
etape['heure_estime'] = etape.apply(transfo, axis='columns')

#etape.info()

etape

Unnamed: 0,id_arret,heure_theorique,ecart,record_timestamp,heure_estime
0,671375,2018-06-19T07:09:46+00:00,0,2018-06-19 07:16:13,2018-06-19 07:16:13
1,671973,2018-06-19T07:45:09+00:00,0,2018-06-19 07:57:53,2018-06-19 07:57:53
2,671375,2018-06-19T07:26:00+00:00,0,2018-06-19 07:31:18,2018-06-19 07:31:18
3,671963,2018-06-19T10:38:00+00:00,0,2018-06-19 10:28:13,2018-06-19 10:28:13
4,672636,2018-06-18T22:00:00+00:00,-247,2018-06-19 10:10:16,2018-06-19 10:06:09
5,671973,2018-06-19T06:53:55+00:00,0,2018-06-19 10:21:55,2018-06-19 10:21:55
6,672520,2018-06-19T10:39:00+00:00,0,2018-06-19 10:30:15,2018-06-19 10:30:15
7,671157,2018-06-19T10:39:00+00:00,0,2018-06-19 10:32:35,2018-06-19 10:32:35
8,672083,2018-06-19T10:41:00+00:00,0,2018-06-19 10:35:35,2018-06-19 10:35:35
9,671677,2018-06-19T10:38:50+00:00,-110,2018-06-19 10:36:33,2018-06-19 10:34:43


In [44]:
#Ouverture de la connection vers la bdd
engine = create_engine("sqlite:///data.sqlite")
connection = engine.connect()

In [45]:
#Table arret
arret.to_sql('arret', connection, if_exists='replace', index=False)

In [46]:
#Table bus
bus.to_sql('bus', connection, if_exists='replace', index=False)

In [47]:
#Table ligne
ligne.to_sql('ligne', connection, if_exists='replace', index=False)

In [48]:
#Table trajet
trajet.to_sql('trajet', connection, if_exists='replace', index=False)

In [49]:
#Table etape
etape.to_sql('etape', connection, if_exists='replace', index=False)

In [50]:
#Fermeture connection
connection.close()