In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error
#from google.colab import files

#loading the cleaned data frame
#uploaded = files.upload()
#uncomment the above code if you are using Google Colab
file_name = 'Name_of_Cleaned_File.csv'
df = pd.read_csv(file_name)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

#defining the model
def train_and_predict_arima(train_data, predict_days=3):
    model = ARIMA(train_data['Actual_Temperature'], order=(1,1,1))
    results = model.fit()
    forecast = results.forecast(steps=predict_days)
    return forecast

#defining the mean absolute error function
def calculate_mae(actual, predicted):
    return mean_absolute_error(actual, predicted)

#training the models using first 3 and first 27 days data to predict the next 3 days
predictions_3 = train_and_predict_arima(df.iloc[:3])
predictions_27 = train_and_predict_arima(df.iloc[:27])

#Calculate the errors
actual_next_3 = df.iloc[3:6]['Actual_Temperature']

mae_3 = calculate_mae(actual_next_3, predictions_3)
mae_27 = calculate_mae(actual_next_3, predictions_27)

mae_app = [
    calculate_mae(df.iloc[3:6]['Actual_Temperature'], df.iloc[3:6]['Predicted_Temperature_Day1']),
    calculate_mae(df.iloc[2:5]['Actual_Temperature'], df.iloc[2:5]['Predicted_Temperature_Day2']),
    calculate_mae(df.iloc[1:4]['Actual_Temperature'], df.iloc[1:4]['Predicted_Temperature_Day3'])
]

mae_model_3 = [
    calculate_mae(actual_next_3.iloc[0:1], predictions_3.iloc[0:1]),
    calculate_mae(actual_next_3.iloc[1:2], predictions_3.iloc[1:2]),
    calculate_mae(actual_next_3.iloc[2:3], predictions_3.iloc[2:3])
]

mae_model_27 = [
    calculate_mae(actual_next_3.iloc[0:1], predictions_27.iloc[0:1]),
    calculate_mae(actual_next_3.iloc[1:2], predictions_27.iloc[1:2]),
    calculate_mae(actual_next_3.iloc[2:3], predictions_27.iloc[2:3])
]


#Plotting
plt.figure(figsize=(12, 10))

#Plot 1: Mean absolute errors for the first 3 days training data and first 27 days training data
plt.subplot(2, 1, 1)
plt.bar(['3 Days', '27 Days'], [mae_3, mae_27], color=['blue', 'orange'])
plt.title('Mean Errors for Different Training Data Sizes')
plt.ylabel('Mean Absolute Errors')

#Plot 2: Lag Comparison
plt.subplot(2, 1, 2)
x = np.arange(3)
width = 0.25


plt.bar(x - width, mae_app, width, label='Weather App', color='blue')
plt.bar(x, mae_model_3, width, label='ARIMA - 3 days', color='orange')
plt.bar(x + width, mae_model_27, width, label='ARIMA - 27 days', color='yellow')

plt.xlabel('Prediction Lag')
plt.ylabel('Mean Absolute Error')
plt.title('Lag Comparison')
plt.xticks(x, ['Lag 1', 'Lag 2', 'Lag 3'])
plt.legend()

plt.tight_layout()
plt.show()

# Print numerical results
print(f"Mean Error (3 days training): {mae_3}")
print(f"Mean Error (27 days training): {mae_27}")
print(f"App Lag Errors: {mae_app}")
print(f"ARIMA (3 days) Lag Errors: {mae_model_3}")
print(f"ARIMA (27 days) Lag Errors: {mae_model_27}")
print("Predictions for the next 3 days (using first 3 days data):")
print(predictions_3)
print("\nPredictions for the next 3 days (using first 27 days data):")
print(predictions_27)