### Modelling to Predict the temperture using seasonal trends

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

df = pd.read_csv('../data/df_global.csv')

df['dt'] = pd.to_datetime(df['dt'])
df['Year'] = df['dt'].dt.year
df['Month'] = df['dt'].dt.month
df['Day'] = df['dt'].dt.day

features = ['Year', 'Month', 'Day']
target = 'LandAndOceanAverageTemperature'

df = df.dropna(subset=[target])

X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
from sklearn.svm import SVR

svr_model = SVR(kernel='rbf', C=100, gamma=0.1)
svr_model.fit(X_train, y_train)

y_pred_svr = svr_model.predict(X_test)

mae_svr = mean_absolute_error(y_test, y_pred_svr)
rmse_svr = np.sqrt(mean_squared_error(y_test, y_pred_svr))
print(f"SVR MAE: {mae_svr}")
print(f"SVR RMSE: {rmse_svr}")

SVR MAE: 0.0942397550816539
SVR RMSE: 0.12280206175696354


In [8]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))

sorted_indices = np.argsort(X_test['Year'])
X_test_sorted = X_test.iloc[sorted_indices]
y_test_sorted = y_test.iloc[sorted_indices]
y_pred_svr_sorted = y_pred_svr[sorted_indices]

plt.plot(y_test_sorted.index, y_test_sorted, label="Actual Temperatures", color="blue", linewidth=2)
plt.plot(y_test_sorted.index, y_pred_svr_sorted, label="Predicted Temperatures (SVR)", color="red", linestyle="--", linewidth=2)

plt.xlabel("Test Data Index")
plt.ylabel("Temperature (°C)")
plt.title("Actual vs Predicted Temperatures (SVR)")
plt.legend()
plt.grid(True)

plot_filepath = "../images/svr_temperature_predictions.png"
plt.savefig(plot_filepath, dpi=300)
plt.close()
print(f"Plot saved as {plot_filepath}")

Plot saved as ../images/svr_temperature_predictions.png


### Saving the model

In [11]:
import pickle

model_filepath = "../pages/models/svr_temperature_model.pkl"
with open(model_filepath, "wb") as file:
    pickle.dump(svr_model, file)
print(f"Model saved as {model_filepath}")

Model saved as ../pages/models/svr_temperature_model.pkl


## Other Experiments

In [41]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA

df = pd.read_csv('../data/df_global.csv')
df['dt'] = pd.to_datetime(df['dt'])
df.set_index('dt', inplace=True)

y = df['LandAndOceanAverageTemperature'].dropna()

model = ARIMA(y, order=(5, 1, 0))
model_fit = model.fit()

forecast = model_fit.forecast(steps=240)  

future_index = pd.date_range(start=y.index[-1], periods=240, freq='M')

forecast_df = pd.DataFrame({'Date': future_index, 'Prediction': forecast})

forecast_df['Date'] = forecast_df['Date'].dt.normalize()

print(forecast_df.head(10))  

forecast_df.to_csv('../pages/models/forecasted_temperatures.csv', index=False)

year = 2024
month = 12

forecast_value = forecast_df.loc[
    (forecast_df['Date'].dt.year == year) & 
    (forecast_df['Date'].dt.month == month), 
    'Prediction'
].values

if len(forecast_value) > 0:
    print(f"Predicted Temperature for {year}-{month:02d}: {forecast_value[0]:.2f}°C")
else:
    print(f"Date for {year}-{month:02d} not found in forecast.")

                 Date  Prediction
2013-09-01 2013-08-31   16.965890
2013-10-01 2013-09-30   16.246837
2013-11-01 2013-10-31   15.481653
2013-12-01 2013-11-30   14.902552
2014-01-01 2013-12-31   14.620066
2014-02-01 2014-01-31   14.748052
2014-03-01 2014-02-28   15.219209
2014-04-01 2014-03-31   15.911292
2014-05-01 2014-04-30   16.622218
2014-06-01 2014-05-31   17.170198
Predicted Temperature for 2024-12: 15.72°C


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  future_index = pd.date_range(start=y.index[-1], periods=240, freq='M')
