# Theta

In [41]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [43]:
# Load your CSV file (update the file path as needed)
df = pd.read_csv('data/interpolated_co2.csv')

# Ensure the 'Date' column is in datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Sort the dataset by date
df.sort_values('Date', inplace=True)

# Set 'Date' as the index
df.set_index('Date', inplace=True)

# Preview the data
df.head()


Unnamed: 0_level_0,CO2
Date,Unnamed: 1_level_1
1974-05-19,333.46
1974-05-20,333.64
1974-05-21,333.5
1974-05-22,333.21
1974-05-23,333.05


In [44]:
# Split the data into training and testing sets
train_data = df[df.index <= '2024-12-31']
test_data = df[df.index > '2024-12-31']

# Preview the split data
train_data.head(), test_data.head()

(               CO2
 Date              
 1974-05-19  333.46
 1974-05-20  333.64
 1974-05-21  333.50
 1974-05-22  333.21
 1974-05-23  333.05,
                CO2
 Date              
 2025-01-01  425.94
 2025-01-02  426.27
 2025-01-03  426.13
 2025-01-04  426.03
 2025-01-05  426.41)

In [45]:
# Initialize the Exponential Smoothing model
model = ExponentialSmoothing(train_data['CO2'], trend='add', seasonal='add', seasonal_periods=12)

# Fit the model
fitted_model = model.fit()

  self._init_dates(dates, freq)


In [46]:
# Make predictions for the period from 2025-01-01 to 2025-04-13
forecast_start_date = '2025-01-01'
forecast_end_date = '2025-04-13'

forecast = fitted_model.predict(start=forecast_start_date, end=forecast_end_date)

# Preview the forecast
forecast.head()

2025-01-01    426.706268
2025-01-02    426.714227
2025-01-03    426.737784
2025-01-04    426.742857
2025-01-05    426.724425
Freq: D, dtype: float64

In [47]:
# Create a DataFrame with Date, Actual and Predicted values
predictions_df = pd.DataFrame({
    'Date': forecast.index,
    'Actual': test_data['CO2'].loc[forecast.index],
    'Predicted': forecast
})

# Preview the DataFrame
predictions_df.head()

Unnamed: 0,Date,Actual,Predicted
2025-01-01,2025-01-01,425.94,426.706268
2025-01-02,2025-01-02,426.27,426.714227
2025-01-03,2025-01-03,426.13,426.737784
2025-01-04,2025-01-04,426.03,426.742857
2025-01-05,2025-01-05,426.41,426.724425


In [49]:
# Calculate RMSE, MAE, and accuracy
rmse = np.sqrt(mean_squared_error(predictions_df['Actual'], predictions_df['Predicted']))
mae = mean_absolute_error(predictions_df['Actual'], predictions_df['Predicted'])
accuracy = 100 * (1 - (abs(predictions_df['Actual'] - predictions_df['Predicted']) / predictions_df['Actual']).mean())

# Print the evaluation metrics
print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'Accuracy: {accuracy}%')

RMSE: 1.166283007037824
MAE: 0.8517337815681384
Accuracy: 99.80118997059755%


In [50]:
# Save the predictions to a CSV file in the Predictions folder
predictions_df.to_csv('Predictions/theta_predictions.csv', index=False)

print("Predictions saved in Predictions/theta_predictions.csv")

Predictions saved in Predictions/theta_predictions.csv
