In [1]:
from operator import index
import string
from time import strftime
from tokenize import String
from turtle import clear
from unicodedata import name
from urllib import response
from numpy import append
from pytz import HOUR
import requests
import pandas as pd
import json
import pyarrow as pa
import pyarrow.parquet as pq
from dotenv import load_dotenv, find_dotenv
import os
import pathlib as Path
import glob
from datetime import datetime, timezone, timedelta as dt
from aws import Aws

ModuleNotFoundError: No module named 'aws'

In [None]:
session = requests.Session()
url = 'http://api.olhovivo.sptrans.com.br/v2.1/'

In [None]:
def auth() -> str:

     '''
        This function is responsible for authenticating our application with SPTRANS using a user token.
        The function should return a [bool], with TRUE for authenticated or FALSE for not authenticated.
     
        :return: A variable that receives two string with the status text (true or false) and status code (200, or 400 or 500, etc...).
        :rtype: string
     '''

     TOKEN_API_OLHOVIVO = os.getenv("TOKEN_API_OLHOVIVO")

     endpoint = f'Login/Autenticar?token={TOKEN_API_OLHOVIVO}'
     response = session.post(url + endpoint)

     return response.text, response.status_code

In [None]:
auth()

('true', 200)

In [None]:
def _remove_duplicates(list_df : list) -> list:
    
    '''
        This function is responsible for retrieving data from the Olho Vivo API by providing the API URL along with the route (endpoint).
        The function should return a [Json] object with the content of the requested endpoint.

        :param list_df: Parameter that contains a list of items from which possible duplicate values will be removed. For example [8000, 3000, 1010]
        :type list_df: list
    '''

    return list(set(list_df))

In [None]:
def _get(endpoint : str) -> json:

    '''
        This function is responsible for retrieving data from the Olho Vivo API by providing the API URL along with the route (endpoint).
        The function should return a [Json] object with the content of the requested endpoint.

        :param endpoint: The parameter that contains the endpoint to be concatenated with the API URL, for example (Login/Autenticar?token={TOKEN_API_OLHOVIVO} or /Posicao)
        :type endpoint: str

        :return: Variable that contains data in JSON format.
        :rtype: json
    '''

    response = session.get(url + endpoint)
    
    return response.json()

In [None]:
def get_bus_position() -> pd.DataFrame:

     '''
          :param hr: Horário de referência da geração das informações.
          :type hr: str

          [{}]l Relação de linhas localizadas onde:

               :param c: Letreiro completo.
               :type c: string
               :param cl: Código identificador da linha.
               :type cl: string
               :param sl: Sentido de operação onde 1 significa de Terminal Principal para Terminal Secundário e 2 de Terminal Secundário para Terminal Principal.
               :type sl: int
               :param lt0: Letreiro de destino da linha.
               :type lt0: string
               :param lt1: Letreiro de origem da linha.
               :type lt1: string
               :param qv: Quantidade de veículos localizados.
               :type qv: int

          [{}]vs Relação de veículos localizados, onde: 

               :param p: Prefixo do veículo.
               :type p: int
               :param a: Indica se o veículo é (true) ou não (false) acessível para pessoas com deficiência.
               :type a: bool
               :param ta: Indica o horário universal (UTC) em que a localização foi capturada. Essa informação está no padrão ISO 8601.
               :type ta: string
               :param py: Informação de latitude da localização do veículo.
               :type py: double
               :param px: Informação de longitude da localização do veículo.
               :type px: double
          
          :return: A pandas DataFrame that contains characteristic data of buses and their geolocational positions.
          :rtype: pd.DataFrame
     '''

     bus_position = _get('Posicao'.format())

     df_bus_position = pd.DataFrame(bus_position)
     df_bus_position = pd.json_normalize(json.loads(df_bus_position.to_json(orient='records'))).explode('l.vs')
     df_bus_position = pd.json_normalize(json.loads(df_bus_position.to_json(orient='records')))
     df_bus_position['ano_part'] = pd.to_datetime("today").strftime("%Y")
     df_bus_position['mes_part'] = pd.to_datetime("today").strftime("%m")
     df_bus_position['dia_part'] = pd.to_datetime("today").strftime("%d")

     return df_bus_position

In [None]:
get_bus_position()

Unnamed: 0,hr,l.c,l.cl,l.sl,l.lt0,l.lt1,l.qv,l.vs.p,l.vs.a,l.vs.ta,l.vs.py,l.vs.px,l.vs.sv,l.vs.is,ano_part,mes_part,dia_part
0,20:22,407N-10,2169,1,METRÔ PENHA,TERM. CID. TIRADENTES,5,48211,True,2023-06-17T23:21:39Z,-23.586445,-46.414822,,,2023,06,17
1,20:22,407N-10,2169,1,METRÔ PENHA,TERM. CID. TIRADENTES,5,48447,True,2023-06-17T23:22:28Z,-23.591106,-46.476055,,,2023,06,17
2,20:22,407N-10,2169,1,METRÔ PENHA,TERM. CID. TIRADENTES,5,48795,True,2023-06-17T23:22:11Z,-23.553479,-46.518090,,,2023,06,17
3,20:22,407N-10,2169,1,METRÔ PENHA,TERM. CID. TIRADENTES,5,48702,True,2023-06-17T23:21:59Z,-23.589333,-46.415752,,,2023,06,17
4,20:22,407N-10,2169,1,METRÔ PENHA,TERM. CID. TIRADENTES,5,48798,True,2023-06-17T23:22:33Z,-23.566218,-46.506886,,,2023,06,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6260,20:22,1767-10,628,1,METRÔ TUCURUVI,PQ. EDU CHAVES,2,22821,True,2023-06-17T23:22:24Z,-23.473826,-46.566147,,,2023,06,17
6261,20:22,1757-10,548,1,METRÔ SANTANA,CONJ. DOS BANCÁRIOS,1,22521,True,2023-06-17T23:22:08Z,-23.465631,-46.632089,,,2023,06,17
6262,20:22,6042-21,34543,2,CEU CAPÃO REDONDO,TERM. CAPELINHA,1,78546,True,2023-06-17T23:22:04Z,-23.654073,-46.762842,,,2023,06,17
6263,20:22,1720-21,1937,1,METRÔ TUCURUVI,VILA SABRINA,1,26013,True,2023-06-17T23:21:06Z,-23.482037,-46.574519,,,2023,06,17


In [None]:
aws = Aws

In [None]:
bucket_name = os.getenv('BUCKET_NAME')
folder_path = '/raw/bus_position/'
file_name = 'bus_position'

df = pd.DataFrame.from_dict(get_bus_position())
csv_string = df.to_csv(index=False)
json_data_bytes = csv_string.encode('utf-8')
file_format = '.csv'

Aws.write_s3(bucket_name, folder_path, file_name, df, file_format)

ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'Foi forçado o cancelamento de uma conexão existente pelo host remoto', None, 10054, None))