In [None]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Load dataset
data = pd.read_csv('INSERT FILE PATH HERE')

# Display initial data
data.head()


In [None]:
# Convert 'Dates' column to datetime for better visualization
data['Dates'] = pd.to_datetime(data['Dates'])

# Plot price evolution
plt.figure(figsize=(14, 6))
plt.plot(data['Dates'], data['Prices'], marker='o', linestyle='-', color='b')
plt.title('Natural Gas Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid(True)
plt.tight_layout()
plt.show()



In [None]:
%pip install statsmodels


In [None]:
# Decompose the time series to study its components
decomposition = seasonal_decompose(data['Prices'], period=12)  # Assuming yearly seasonality with monthly data

# Plot decomposition
fig, axes = plt.subplots(4, 1, figsize=(14, 10))

labels = ['Original', 'Trend', 'Seasonal', 'Residual']
components = [data['Prices'], decomposition.trend, decomposition.seasonal, decomposition.resid]

for i, ax in enumerate(axes):
    ax.plot(data['Dates'], components[i], label=labels[i])
    ax.legend(loc='upper left')
    ax.set_title(f"{labels[i]} Component")
    ax.grid(True)

plt.tight_layout()
plt.show()


In [None]:
# Test stationarity of the series
def is_stationary(timeseries):
    """Check stationarity using the Augmented Dickey-Fuller test."""
    dftest = adfuller(timeseries, autolag='AIC')
    return dftest[1] <= 0.05

# Check original and differenced series
is_stationary_original = is_stationary(data['Prices'])
data['First Difference'] = data['Prices'].diff()
is_stationary_first_diff = is_stationary(data['First Difference'].dropna())

is_stationary_original, is_stationary_first_diff



In [None]:
# Plot ACF and PACF for insights into AR and MA terms
fig, axes = plt.subplots(1, 2, figsize=(14, 4))
plot_acf(data['First Difference'].dropna(), lags=20, ax=axes[0])
axes[0].set_title('ACF for First Differenced Series')
plot_pacf(data['First Difference'].dropna(), lags=20, ax=axes[1])
axes[1].set_title('PACF for First Differenced Series')
plt.tight_layout()
plt.show()



In [None]:
# Fit SARIMA model
model = SARIMAX(data['Prices'], order=(1,1,1), seasonal_order=(1,1,1,12))
results = model.fit(disp=-1)
results.summary()


In [None]:
# Compare actual and fitted values
data['Fitted'] = results.fittedvalues

plt.figure(figsize=(14, 6))
plt.plot(data['Dates'], data['Prices'], label='Actual', color='blue')
plt.plot(data['Dates'], data['Fitted'], label='Fitted', color='red', linestyle='--')
plt.title('Actual vs Fitted Natural Gas Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Forecast for the next year
forecast = results.get_forecast(steps=12)
forecast_index = pd.date_range(data['Dates'].iloc[-1] + pd.Timedelta(days=1), periods=12, freq='M')
forecast_series = pd.Series(forecast.predicted_mean.values, index=forecast_index)

plt.figure(figsize=(14, 6))
plt.plot(data['Dates'], data['Prices'], label='Historical', color='blue')
plt.plot(forecast_index, forecast_series, label='Forecast', color='green', linestyle='--')
plt.fill_between(forecast_index, forecast.conf_int()['lower Prices'], forecast.conf_int()['upper Prices'], color='green', alpha=0.1)
plt.title('Natural Gas Prices Forecast')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
