# Import des modules nécessaires

In [1]:
    # Pour manipuler efficacement des tables de données dans Python
    import pandas as pd

    # Pour faire des requêtes GET et POST
    import requests
    
    # Pour gérer les date
    import datetime as dt
    
    # sql
    from sqlalchemy import create_engine

In [2]:
print(pd.__version__)

0.22.0


In [3]:
print(requests.__version__)

2.18.4


# Requêtage d'une API REST

Pour lancer une requête GET, c'est ultra-simple : on utilise `requests.get(url)`.

Si vous voulez glisser des paramètres avec la requête, utilisez l'argument optionnel `params`, qui accepte un simple dictionnaire.

In [4]:
r = requests.get("https://data.angers.fr/api/records/1.0/search/",
                 params={
                     'dataset': 'horaires-theoriques-et-arrets-du-reseau-irigo-gtfs',
                     'rows': 10
                 })

In [5]:
g = requests.get("https://data.angers.fr/api/records/1.0/search/",
                 params={
                     'dataset': 'bus-tram-position-tr',
                     'rows': 200
                 })

Regardons la nature de ce qui nous est renvoyé.

In [6]:
type(g)

requests.models.Response

C'est un objet de la classe `Response`. Si vous voulez connaitre toutes les méthodes attachées, faites `help(r)`

In [7]:
help(g)

Help on Response in module requests.models object:

class Response(builtins.object)
 |  The :class:`Response <Response>` object, which contains a
 |  server's response to an HTTP request.
 |  
 |  Methods defined here:
 |  
 |  __bool__(self)
 |      Returns True if :attr:`status_code` is less than 400.
 |      
 |      This attribute checks if the status code of the response is between
 |      400 and 600 to see if there was a client error or a server error. If
 |      the status code, is between 200 and 400, this will return True. This
 |      is **not** a check to see if the response code is ``200 OK``.
 |  
 |  __enter__(self)
 |  
 |  __exit__(self, *args)
 |  
 |  __getstate__(self)
 |  
 |  __init__(self)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __iter__(self)
 |      Allows you to use a response as an iterator.
 |  
 |  __nonzero__(self)
 |      Returns True if :attr:`status_code` is less than 400.
 |      
 |      This attribute checks if

Pour tester que tout a bien fonctionnné, on va vérifié le statut.

In [8]:
g.raise_for_status()

Si ça dit rien c'est que c'est bon :) Dans le doute, regardons le statut...

In [9]:
g.status_code

200

Statut 200 = succès !!

In [10]:
g.json()

{'nhits': 106,
 'parameters': {'dataset': ['bus-tram-position-tr'],
  'format': 'json',
  'rows': 200,
  'timezone': 'UTC'},
 'records': [{'datasetid': 'bus-tram-position-tr',
   'fields': {'cap': 155,
    'coordonnees': [47.504063, -0.58675933],
    'dest': 'CHU - HOPITAL',
    'ecart': 126,
    'etat': 'LIGN',
    'harret': '2018-06-20T09:25:33+00:00',
    'idarret': 672852,
    'iddesserte': 268939803,
    'idligne': 268435463,
    'idparcours': 268939776,
    'idvh': 268435884,
    'mnemoarret': 'ACAS-7C',
    'mnemoligne': '07',
    'nomarret': 'ACACIAS',
    'nomligne': 'HOPITAL <> MONTREUIL JUIGNE',
    'novh': '428',
    'numarret': 9581,
    'sv': '0725',
    'type': 'OMNICITY',
    'x': 379826,
    'y': 2282369},
   'geometry': {'coordinates': [-0.58675933, 47.504063], 'type': 'Point'},
   'record_timestamp': '2018-06-20T09:24:56+00:00',
   'recordid': '1fd516059c0bf544c7241ec3ee006cfe9b15987e'},
  {'datasetid': 'bus-tram-position-tr',
   'fields': {'cap': 330,
    'coordonne

# Remplissage de DataFrame

On peut construire une `DataFrame` directement à partir d'un "dictionnaires de colonnes".

In [11]:
dd = g.json()['records']

In [12]:
dd[0]

{'datasetid': 'bus-tram-position-tr',
 'fields': {'cap': 155,
  'coordonnees': [47.504063, -0.58675933],
  'dest': 'CHU - HOPITAL',
  'ecart': 126,
  'etat': 'LIGN',
  'harret': '2018-06-20T09:25:33+00:00',
  'idarret': 672852,
  'iddesserte': 268939803,
  'idligne': 268435463,
  'idparcours': 268939776,
  'idvh': 268435884,
  'mnemoarret': 'ACAS-7C',
  'mnemoligne': '07',
  'nomarret': 'ACACIAS',
  'nomligne': 'HOPITAL <> MONTREUIL JUIGNE',
  'novh': '428',
  'numarret': 9581,
  'sv': '0725',
  'type': 'OMNICITY',
  'x': 379826,
  'y': 2282369},
 'geometry': {'coordinates': [-0.58675933, 47.504063], 'type': 'Point'},
 'record_timestamp': '2018-06-20T09:24:56+00:00',
 'recordid': '1fd516059c0bf544c7241ec3ee006cfe9b15987e'}

In [13]:
#Arret
id_arret = [elem['fields']['idarret'] for elem in dd]
nom_arret = [elem['fields']['nomarret'] for elem in dd]
mne_arret = [elem['fields']['mnemoarret'] for elem in dd]

arret = pd.DataFrame({
    'id_arret': id_arret,
    'nom_arret': nom_arret,
    'mne_arret': mne_arret
})

#arret.info()

arret

Unnamed: 0,id_arret,mne_arret,nom_arret
0,672852,ACAS-7C,ACACIAS
1,672949,BICHO.-E,BICHON
2,673827,VIVIERS,VIVIERS
3,673591,PETIPA-E,PETITES PANNES
4,673245,GAUBOUR5,GAUBOURGS
5,674341,VERNEA-E,AURIOL
6,672880,ARCEAU,ARCEAU
7,673514,MONPRO-E,MONPROFIT
8,673498,MILLOT-E,SAUMUROISE
9,673027,CEZAN-E,CEZANNE


In [14]:
#Bus
id_vh = [elem['fields']['idvh'] for elem in dd]
type_vh = [elem['fields']['type'] for elem in dd]
etat_vh = [elem['fields']['etat'] for elem in dd]

bus = pd.DataFrame({
    'id_vh': id_vh,
    'type_vh': type_vh,
    'etat_vh': etat_vh
})

#bus.info()

bus

Unnamed: 0,etat_vh,id_vh,type_vh
0,LIGN,268435884,OMNICITY
1,LIGN,268435899,OMNICITY
2,LIGN,268435910,OMNICITY
3,LIGN,268435913,OMNICITY
4,LIGN,268435987,GX 327
5,LIGN,268435994,GX 327
6,LIGN,268436057,MAN City
7,LIGN,268436070,MAN City
8,LIGN,268436107,OMNIART
9,LIGN,268436110,OMNIART


In [15]:
#Ligne
id_ligne = [elem['fields']['idligne'] for elem in dd]
nom_ligne = [elem['fields']['nomligne'] for elem in dd]
mne_ligne = [elem['fields']['mnemoligne'] for elem in dd]

ligne = pd.DataFrame({
    'id_ligne': id_arret,
    'nom_ligne': nom_ligne,
    'mne_ligne': mne_ligne
})

#ligne.info()

ligne

Unnamed: 0,id_ligne,mne_ligne,nom_ligne
0,672852,07,HOPITAL <> MONTREUIL JUIGNE
1,672949,08,PONTS CE <> AQUAVITA H. RECULEE
2,673827,08,PONTS CE <> AQUAVITA H. RECULEE
3,673591,05,CIRCULAIRE VERNEAU GARE EUROPE
4,673245,05,CIRCULAIRE VERNEAU GARE EUROPE
5,674341,05,CIRCULAIRE VERNEAU GARE EUROPE
6,672880,04,BEAUCOUZE <> ST BARTHELEMY
7,673514,01,BELLE BEILLE <> MONPLAISIR
8,673498,02,ST SYLVAIN BANCHAIS <>TRELAZE
9,673027,04,BEAUCOUZE <> ST BARTHELEMY


In [16]:
#Trajet
id_bus = [elem['fields']['idvh'] for elem in dd]
id_ligne = [elem['fields']['idligne'] for elem in dd]
latitude = [elem['fields']['coordonnees'][0] for elem in dd]
longitude = [elem['fields']['coordonnees'][1] for elem in dd]

trajet = pd.DataFrame({
    'id_bus': id_bus,
    'id_ligne': id_ligne,
    'latitude': latitude,
    'longitude': longitude
})

#trajet.info()

trajet

Unnamed: 0,id_bus,id_ligne,latitude,longitude
0,268435884,268435463,47.504063,-0.586759
1,268435899,268435464,47.478688,-0.564973
2,268435910,268435464,47.451116,-0.533999
3,268435913,268435461,47.487846,-0.572337
4,268435987,268435461,47.451319,-0.571670
5,268435994,268435461,47.491421,-0.565109
6,268436057,268435460,47.470633,-0.518206
7,268436070,268435457,47.474476,-0.565288
8,268436107,268435458,47.456899,-0.531975
9,268436110,268435460,47.468693,-0.492695


In [17]:
#Etape
id_arret = [elem['fields']['idarret'] for elem in dd]
harret = [elem['fields']['harret'] for elem in dd]
record_timestamp = [elem['record_timestamp'] for elem in dd]
ecart = [elem['fields']['ecart'] for elem in dd]

etape = pd.DataFrame({
    'id_arret': id_arret,
    'heure_theorique': harret,
    'ecart': ecart,
    'record_timestamp': record_timestamp
})

etape['record_timestamp'] = pd.to_datetime(etape['record_timestamp'])

def transfo(row):
    return row['record_timestamp'] + dt.timedelta(seconds=row['ecart'])
etape['heure_estime'] = etape.apply(transfo, axis='columns')

#etape.info()

etape

Unnamed: 0,ecart,heure_theorique,id_arret,record_timestamp,heure_estime
0,126,2018-06-20T09:25:33+00:00,672852,2018-06-20 09:24:56,2018-06-20 09:27:02
1,27,2018-06-20T09:25:02+00:00,672949,2018-06-20 09:24:56,2018-06-20 09:25:23
2,-103,2018-06-20T09:25:06+00:00,673827,2018-06-20 09:24:56,2018-06-20 09:23:13
3,-15,2018-06-20T09:25:16+00:00,673591,2018-06-20 09:24:56,2018-06-20 09:24:41
4,43,2018-06-20T09:25:42+00:00,673245,2018-06-20 09:24:56,2018-06-20 09:25:39
5,-151,2018-06-20T09:25:07+00:00,674341,2018-06-20 09:24:56,2018-06-20 09:22:25
6,2,2018-06-20T09:25:10+00:00,672880,2018-06-20 09:24:56,2018-06-20 09:24:58
7,235,2018-06-20T09:25:13+00:00,673514,2018-06-20 09:24:56,2018-06-20 09:28:51
8,251,2018-06-20T09:24:51+00:00,673498,2018-06-20 09:24:56,2018-06-20 09:29:07
9,15,2018-06-20T09:25:05+00:00,673027,2018-06-20 09:24:56,2018-06-20 09:25:11


# test

In [18]:
def test(df):
    try:
        result = True
        for c in df.columns:
            if ('id_ligne' or 'id_arret' or 'id_vh')  in c:
                if not df[c].isnull().all():
                    result = False
            if 'latitude' in c:
                if not (df[c] > 45).all():
                    result = False
            if 'longitude' in c:
                if not(df[c] < 1).all():
                    result = False
        assert result 
    except Exception as e:
        print(e)

In [19]:

list = [arret, bus, ligne, etape, trajet]

for l in list:
    test(l)





In [20]:
#Ouverture de la connection vers la bdd
engine = create_engine("sqlite:///data.sqlite")
connection = engine.connect()

In [21]:
#Table arret
arret.to_sql('arret', connection, if_exists='replace', index=False)

In [22]:
#Table bus
bus.to_sql('bus', connection, if_exists='replace', index=False)

In [23]:
#Table ligne
ligne.to_sql('ligne', connection, if_exists='replace', index=False)

In [24]:
#Table trajet
trajet.to_sql('trajet', connection, if_exists='replace', index=False)

In [25]:
#Table etape
etape.to_sql('etape', connection, if_exists='replace', index=False)

In [26]:
#Fermeture connection
connection.close()