In [1]:
import pandas as pd
import json
from urllib import request
import numpy as np

In [2]:
ORIGINAL_URL = "https://www.sistemas.dftrans.df.gov.br/service/gps/operacoes"

In [3]:
req = request.Request(ORIGINAL_URL, method='GET')
print('Requesting on %s' % ORIGINAL_URL)

response = request.urlopen(req)
body = response.read().decode('utf-8')
body = json.loads(body)

#df = pd.DataFrame.from_records(body['Dados'], columns=body['Campos'])

Requesting on https://www.sistemas.dftrans.df.gov.br/service/gps/operacoes


In [4]:
df = pd.DataFrame()
for operadora in body:
    df_temp = pd.DataFrame.from_records(operadora['veiculos'])
    df = pd.concat([df,df_temp], ignore_index=True)

In [5]:
print(df.shape)
df.head()

(2640, 9)


Unnamed: 0,codigoImei,direcao,horario,linha,localizacao,numero,sentido,valid,velocidade
0,7316166,244.0,1597824667000,,"{'latitude': -15.92161, 'longitude': -48.10853}",335029,VOLTA,True,"{'unidade': 'KM_POR_HORA', 'valor': 0}"
1,7516188,64.0,1600491046000,,"{'latitude': -15.89958, 'longitude': -48.12305}",333581,VOLTA,True,"{'unidade': 'KM_POR_HORA', 'valor': 0}"
2,7119230,5.0,1600969642000,,"{'latitude': -15.92148, 'longitude': -48.10834}",332780,IDA,True,"{'unidade': 'KM_POR_HORA', 'valor': 0}"
3,7477164,253.0,1601678032000,,"{'latitude': -15.89945, 'longitude': -48.12276}",334146,IDA,True,"{'unidade': 'KM_POR_HORA', 'valor': 0}"
4,7238112,158.0,1602410128000,807.1,"{'latitude': -15.91111, 'longitude': -48.05508}",334561,IDA,True,"{'unidade': 'KM_POR_HORA', 'valor': 0}"


In [6]:
def process_data(df):
    #df = df.apply(convert_lat_long, axis=1)
    df_lat_long = df.localizacao.apply(pd.Series)
    df = df.merge(df_lat_long, on=df.index)
    
    # limpa campos com string vazia -> ""
    df['GPS_Latitude'] = df['latitude']
    df['GPS_Longitude'] = df['longitude']

    df['GPS_Latitude'].replace('', np.nan, inplace=True)
    df['GPS_Longitude'].replace('', np.nan, inplace=True)
    df = df.dropna(subset=['GPS_Latitude', 'GPS_Longitude'])

    df.GPS_Latitude = df.GPS_Latitude.astype(float)
    df.GPS_Longitude = df.GPS_Longitude.astype(float)
    df.drop(columns=['latitude','longitude','localizacao'], inplace=True)

    return df

def apply_filters(df):
    
    linhas = ['0.195', '147.5', '147.6', '180.1', '180.2', '181.2', '181.4', '8002','106.2','0.147','2207','2209']
    
    df = df[df.linha.isin(linhas)]
    
    return df

In [7]:
df = process_data(df)

In [8]:
df = apply_filters(df)

In [9]:
df

Unnamed: 0,key_0,codigoImei,direcao,horario,linha,numero,sentido,valid,velocidade,GPS_Latitude,GPS_Longitude
1026,1026,,6.01,1602940982000,2207.0,223972,IDA,True,,-15.879971,-47.959644
1042,1042,,201.27,1602940952000,180.1,229881,IDA,True,,-15.794376,-47.882142
1048,1048,,246.66,1602941038000,0.147,226866,IDA,True,,-15.856625,-47.861613
1049,1049,,219.07,1602941010000,2207.0,229440,IDA,True,,-15.860217,-47.935478
1066,1066,,106.54,1602941002000,180.1,229873,VOLTA,True,,-15.886475,-47.812531
1091,1091,,305.79,1602941022000,180.1,229822,IDA,True,,-15.842214,-47.821616
1099,1099,,35.72,1602940990000,180.1,227463,VOLTA,True,,-15.906501,-47.770969
1142,1142,,167.16,1602941018000,2207.0,227927,IDA,True,,-15.744724,-47.897268
1158,1158,,288.14,1602940978000,180.1,229857,IDA,True,,-15.801101,-47.856323
1169,1169,,8.28,1602940944000,2207.0,229288,IDA,True,,-15.879979,-47.959899
