In [119]:
import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import requests
import json
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt

#import make_model_count as mm

# const variables
col_recall_number = "RecallCount"
col_date = "Date"
col_month = "Month"
col_min_temp = "MinTemp"
col_max_temp = "MaxTemp"


def get_df():
    # get_df function load the csv file and creates the panda dataframe and return it
    # Furthermore, it also change the names of the columns in order to make these names are more readable
    vehicle_df = pd.read_csv("./resources/vrdb_60days_daily.csv")
    vehicle_df.head()
    vehicle_df = vehicle_df[["RECALL_NUMBER_NUM", "RECALL_DATE_DTE"]]
    vehicle_df.columns = [col_recall_number, col_date]
    vehicle_df = vehicle_df.groupby([col_date]).count()
    vehicle_df = vehicle_df.sort_values(by=col_date)
    vehicle_df = vehicle_df.reset_index()
    vehicle_df = vehicle_df[vehicle_df[col_date].str.contains("2022")]
    vehicle_df[col_month] = vehicle_df[col_date].astype(str).str[5:7]
    vehicle_df = vehicle_df.groupby([col_month]).sum()

    # since the vehicle recall count and the temperature has two different range of values
    #  it is more clearer if we add the pct_changes of the vehicle recalls to the chart
    vehicle_df = vehicle_df.pct_change()

    return vehicle_df


def get_weather_df():
    # get_weather_df loads the weather data from the url and returns

    #url = "https://api.weatherbit.io/v2.0/normals?lat=43.6532&lon=79.3832&start_day=01-01&end_day=12-31&tp=monthly&key=8eb2410b9bf74bb88acf2b1626fe05f5"
    #res = requests.get(url)
    # print(res.json())

    weather_data = open("./Resources/weather_data.json")
    weather_json = json.load(weather_data)
    arr = weather_json["data"]
    # weather_df = pd.read_json('''weather_json["data"]''')
    # df2 = pd.DataFrame.from_dict(weather_json, orient="index")

    min_temp = []
    max_temp = []
    months = []
    for wdata in arr:
        min = wdata["min_temp"]
        max = wdata["max_temp"]
        month = wdata["month"]
        min_temp.append(min)
        max_temp.append(max)
        months.append(month)

    weather_df = pd.DataFrame(
        {col_month: months, col_min_temp: min_temp, col_max_temp: max_temp})
    weather_df = weather_df.pct_change()
    weather_df = weather_df.drop(columns=col_month)
    return weather_df


def recalls_temperature_df():
    # recalls_temperature_df returns the dataframe a combined dataframe with contains both temperature and recall count

    weather_df = get_weather_df()
    comb_df = weather_df.copy()
    vehicle_df = get_df()
    comb_df[col_recall_number] = vehicle_df[col_recall_number].values

    f1 = interp1d(comb_df.index.values, comb_df[col_min_temp], kind='cubic')
    comb_df["tttt"] = f1(comb_df.index.values)

    return comb_df


def recalls_temperature():
    # recalls_temperature plots a line chart to get the relationship between the minimum temperature and the recall counts
    comb_df = recalls_temperature_df()

    x = np.array(comb_df.index[1:])
    min_temp = np.array(comb_df[col_min_temp][1:])
    max_temp = np.array(comb_df[col_max_temp][1:])
    recalls = np.array(comb_df[col_recall_number][1:])

    cubic_min_temp = interp1d(x, min_temp, kind="cubic")
    cubic_max_temp = interp1d(x, max_temp, kind="cubic")
    cubic_recalls = interp1d(x, recalls, kind="cubic")

    # Plotting the Graph
    x_axis = np.linspace(x.min(), x.max(), 500)
    y_cubic_min_temp = cubic_min_temp(x_axis)
    y_cubic_max_temp = cubic_max_temp(x_axis)
    y_cubic_recalls = cubic_recalls(x_axis)

    cubic_df = pd.DataFrame()
    cubic_df[col_min_temp] = y_cubic_min_temp
    cubic_df[col_recall_number] = y_cubic_recalls

    return cubic_df.hvplot.line(
        width=1024+512, height=512+256,
        xlabel="Month", ylabel="Number of recalls",
        title="Vehicle recalls vs temperature",
        fontsize={'title': '35pt', 'ylabel': '25px',
                  'xlabel': '25px', 'ticks': 20},

    )


recalls_temperature()
