In [None]:
import json
import boto3
import pandas as pd
from io import StringIO


def taxi_trips_transformations(taxi_trips: pd.DataFrame) -> pd.DataFrame:
    """ Perform transformation with the taxi data

    Parameters:
    -----------
        taxi_trips : pd.DataFrame
            The dataframe holding the daly taxi trips

    Returns:
    --------
        pd.dataframe
        The cleaned transformed Dataframe holding the taxi trips.
    """

    if not isinstance(taxi_trips, pd.DataFrame):
        raise TypeError("taxi_trips is not a valid pandas datarfame.")

    taxi_trips.drop(["pickup_census_tract", "dropoff_census_tract"], axis=1, inplace=True)
    taxi_trips.drop(["pickup_centroid_location", "dropoff_centroid_location"], axis=1, inplace=True)

    taxi_trips.info()

    taxi_trips.rename(columns={"pickup_community_area": "pickup_community_area_id",
                            "dropoff_community_area": "dropoff_community_area_id"}, inplace=True)

    taxi_trips["datetime_for_weather"] = pd.to_datetime(taxi_trips["trip_start_timestamp"]).dt.floor("H")

    return taxi_trips
    

def update_master(taxi_trips: pd.DataFrame, master: pd.DataFrame, id_column: str, value_column: str) -> pd.DataFrame:
    """Extend the master dataframe with new values.

    Parameters:
    -----------
    taxi_trips: pd.DataFrame
        Dataframe holding the dayly taxi trips
    master: pd.DataFrame
        Dataframe holding the master data
    id_column: str
        The Id column of the master dataframe.
    value_column: str
        Name of the column in master_df containing the values.

    Returns:
    --------
    pd.DataFrame
        The updated master data, if new values are in the taxi data.
    
    """
    max_id = master[id_column].max()
    
    new_values_list = [value for value in taxi_trips[value_column].values if value not in master[value_column].values]
    new_values_df = pd.DataFrame({
        id_column: range(max_id + 1, max_id + len(new_values_list) + 1),
        value_column: new_values_list
    })

    updated_master = pd.concat([master, new_values_df], ignore_index=True)

    return updated_master
    

def transform_weather_data(weather_data) -> pd.DataFrame:
    """Make transformations on the daily weather api response.

    Parameters
    ----------
    weather_data : JSON
        The daily weather data from the Open Meteo API.

    Returns
    -------
    pd.DataFrame
        A dataframe presentation of the data
    """
    weather_data_filtered = {
        "datetime": weather_data["hourly"]["time"],
        "temperature": weather_data["hourly"]["temperature_2m"],
        "wind_speed": weather_data["hourly"]["wind_speed_10m"],
        "rain": weather_data["hourly"]["rain"],
        "precipitation": weather_data["hourly"]["precipitation"]
    }

    weather_df = pd.DataFrame(weather_data_filtered)
    weather_df["datetime"] = pd.to_datetime(weather_df["datetime"])
    # weather_df.head()

    return weather_df
    
    
def read_csv_from_s3(bucket: str, path: str, filename: str) -> pd.DataFrame:
    """Downloads a csv file from an S3 bucket
    
    Parameters:
    -----------
    bucket : str
        The bucket where the files at.
    path : str
        The folders to the files.
    filename : str
        Name of the file.
        
    Returns:
    --------
    pd.DataFrame
        A Dataframe of the downloaded file.
    """
    
    s3 = boto3.client("s3")

    full_path = f"{path}{filename}"
    
    object = s3.get_object(Bucket=bucket, Key=full_path)
    object = object["Body"].read().decode("utf-8")
    output_df = pd.read_csv(StringIO(object))

    return output_df
    

def lambda_handler(event, context):
    s3 = boto3.client("s3")
    bucket = "cubix-chicago-taxi-goga"
    
    raw_weather_folder = "raw_data/to_processed/weather_data/"
    raw_taxi_trips_folder = "raw_data/to_processed/taxi_data/"    
    
    payment_type_master_folder = "transformed_data/payment_type/"
    company_type_master_folder = "transformed_data/company/"
    
    payment_type_master_filename = "payment_type_master.csv"
    company_master_filename = "company_master.csv"
    
    payment_type_master = read_csv_from_s3(bucket=bucket, path=payment_type_master_folder, filename=payment_type_master_filename)    
    company_master = read_csv_from_s3(bucket=bucket, path=company_type_master_folder, filename=company_master_filename)    
    

    # Taxi data transformation and loading
    for file in  s3.list_objects(Bucket=bucket, Prefix=raw_taxi_trips_folder)['Contents']:
        taxi_trip_key = file["Key"]

        if taxi_trip_key.split("/")[-1].strip() != "":
            if taxi_trip_key.split(".")[1] == "json":
                
                response = s3.get_object(Bucket=bucket, Key=taxi_trip_key)
                content = response["Body"]
                taxi_trip_data_json = json.loads(content.read())
                
                taxi_trip_data_raw = pd.DataFrame(taxi_trip_data_json)
                taxi_trips = taxi_trips_transformations(taxi_trip_data_raw)

                # company_master_updated = update_master(taxi_trips, company_master, "comany_id", "company")                
                # payment_type_master_updated = update_master(taxi_trips, payment_type_master, "payment_type_id", "payment_type")                


    # Weahter data transformation and loading
    for file in  s3.list_objects(Bucket=bucket, Prefix=raw_weather_folder)['Contents']:
        weather_key = file["Key"]
        
        if weather_key.split("/")[-1].strip() != "":
            if weather_key.split(".")[1] == "json":
                
                response = s3.get_object(Bucket=bucket, Key=weather_key)
                content = response["Body"]
                weather_data_json = json.loads(content.read())
                
                weather_data = transform_weather_data(weather_data_json)
                

    