# Chicago taxi extract

## Section initial

### Import necessarry modules

In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
import json
import os
import requests
import boto3
from typing import List, Dict

### Define functions

In [None]:
def get_taxi_data(formatted_datetime: str) -> List:
    """
    Retrieves taxi data from the Chicago Taxi Data API for a specific date (YYYY-MM-DD).

    Args:
        formatted_datetime (str): The date in the format 'YYYY-MM-DD'.

    Returns:
        List: A list of taxi data records for the specified date.
    """
    taxi_url = (
        f"https://data.cityofchicago.org/resource/ajtu-isnz.json?"
        f"$where=trip_start_timestamp >= '{formatted_datetime}T00:00:00' "
        f"AND trip_start_timestamp <= '{formatted_datetime}T23:59:59'&$limit=30000"
    )

    response_taxi = requests.get(taxi_url)
    taxi_data = response_taxi.json()

    return taxi_data

In [None]:
def get_weather_data(formatted_datetime: str) -> List:
    """
    Retrieves weather data from the Open-Meteo API for a specific date (YYYY-MM-DD).

    Args:
        formatted_datetime (str): The date in the format 'YYYY-MM-DD'.

    Returns:
        List: A list of weather data records for the specified date.
    """
    weather_url = "https://archive-api.open-meteo.com/v1/era5"

    params = {
        "latitude": 41.85,
        "longitude": -87.65,
        "start_date": formatted_datetime,
        "end_date": formatted_datetime,
        "hourly": "temperature_2m,wind_speed_10m,rain,precipitation"
    }

    response_weather = requests.get(weather_url, params = params)
    weather_data = response_weather.json()

    return weather_data

In [None]:
def upload_to_s3(data: Dict, folder_name: str, file_name: str) -> None:
    """
    Uploads data to an S3 bucket.

    Args:
        data (Dict): The data to be uploaded.
        folder_name (str): The name of the folder in the S3 bucket.
        file_name (str): The name of the file to be uploaded.

    Returns:
        None
    """
    client = boto3.client('s3')
    client.put_object(
        Bucket = 'cubix-chicago-taxi-ld',
        Key = f'raw_data/to_processed/{folder_name}/{file_name}',
        Body = json.dumps(data)
    )

## Section main

In [None]:
def lambda_handler(event, context):
    current_datetime = datetime.now() - relativedelta(months=2) # 2 months earlier from now
    formatted_datetime = current_datetime.strftime("%Y-%m-%d")

    taxi_data_api_call = get_taxi_data(formatted_datetime)
    weather_data_api_call = get_weather_data(formatted_datetime)

    taxi_file_name = f'taxi_raw_{formatted_datetime}.json'
    weather_file_name = f'weather_raw_{formatted_datetime}.json'

    upload_to_s3(data = taxi_data_api_call, folder_name = 'taxi_data', file_name = taxi_file_name)
    print('Taxi data has been uploaded.')

    upload_to_s3(data = weather_data_api_call, folder_name = 'weather_data', file_name = weather_file_name)
    print('Weather data has been uploaded.')