# Imports

In [None]:
import json
from datetime import datetime
from dateutil.relativedelta import relativedelta
import os
import requests
import boto3
from typing import List, Dict

# Aim

The aim of this AWS Lamda function is to collect Taxi trips and weather data on daily basis.

# Steps

DONE 0. configure memory (1000) and runtime (5min) <br />
DONE 1. get one day's (T - 2 months) taxi data <br />
DONE 2. get one day's (T - 2 months) weather data <br />
DONE 3. upload to S3 raw_data/to_process/taxi_data or raw_data/to_process/weather data <br />
DONE 4. organize functions <br />
DONE 5. type hints <br />
DONE 6. docstrings (automatisation with ChatGPT) <br />
DONE 7. create triggers <br />

# Functions Defined

In [None]:
def get_taxi_data(formatted_datetime: str) -> Dict: # Dict is the return data format

    """
    Retrievs taxi data for a given date.

    Parameters:
    - formatted_datetime (str): containing the date in format "YYYY_MM_DD".

    Returns:
    . taxi_data_upload (dict): a dictionary as a json file containing taxi data.
    """
    
    # get taxi data
    taxi_url = f"""https://data.cityofchicago.org/resource/ajtu-isnz.json?$where=trip_start_timestamp >= '{formatted_datetime}T00:00:00' AND trip_start_timestamp <= '{formatted_datetime}T23:59:59'&$limit=30000"""
    response_taxi = requests.get(taxi_url)
    taxi_data_upload = response_taxi.json()
    
    return taxi_data_upload

In [None]:
def get_weather_data(formatted_datetime: str) -> Dict:
    
    """ 
    Retrievs weather data for a given date.

    Parameters:
    - formatted_datetime (str): containing the date in format "YYYY_MM_DD".

    Returns:
     weather_data_upload (dict): a dictionary as a json file containing weather data.
    """
    
    # get weather data
    weather_url = "https://archive-api.open-meteo.com/v1/era5"

    params = {
        'latitude': 41.85,
        'longitude': -87.65,
        'start_date': formatted_datetime,
        'end_date': formatted_datetime,
        'hourly': 'temperature_2m,wind_speed_10m,rain,precipitation'
    }
    
    response_weather = requests.get(weather_url, params = params)
    weather_data_upload = response_weather.json()
    
    return weather_data_upload

In [None]:
def upload_to_s3(data: Dict, foldername: str, filename: str) -> None:
    
    """ 
    Uploads data to AWS S3 bucket: cubix-chicago-taxi-zh.

    Parameters:
    - data (dict): containing the data to be uploaded in a json file.
    - foldername (str): containing the target folder name.
    - filename (str): containing the filename of the file uploaded.

    Returns: 
    - None.
    """
    
    # upload weather data
    client = boto3.client("s3")
    Bucket = "cubix-chicago-taxi-zh"
    Key = f"raw_data/to_process/{foldername}/{filename}"
    Body = json.dumps(data)
    client.put_object(
        Bucket = "cubix-chicago-taxi-zh", 
        Key = f"raw_data/to_process/{foldername}/{filename}", 
        Body = json.dumps(data)
    )

# Main Function

In [None]:
# MAIN
def lambda_handler(event, context):

    current_datetime = datetime.now() - relativedelta(months=2)
    formatted_datetime = current_datetime.strftime("%Y-%m-%d")
    
    taxi_data_upload = get_taxi_data(formatted_datetime)
    print("Taxi data downloaded.")
    taxi_filename = f"taxi_raw_{formatted_datetime}.json"
    upload_to_s3(taxi_data_upload, "taxi_data", taxi_filename)
    print("Taxi data uploaded.")
    
    weather_data_upload = get_weather_data(formatted_datetime)
    print("Weather data downloaded.")
    weather_filename = f"weather_raw_{formatted_datetime}.json"
    upload_to_s3(weather_data_upload, "weather_data", weather_filename)
    print("Weather data uploaded.")
    
    print("The script ran successfully.")