In [1]:
# Check time stamps
# Inspect data and visualize the time stamps of the data

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import pandas as pd

tdf = pd.read_csv('data/Electric_Production.csv', parse_dates=["DATE"], index_col="DATE")

In [None]:
print(tdf.head())

In [None]:
# checking for missing values
tdf.isnull().sum()

In [None]:
# plotting the TSD
plt.figure(figsize=(12, 6))
plt.plot(tdf.index, tdf["IPG2211A2N"], label="Electric Production", color="blue")
plt.title("Electric Production Time Series Data")
plt.xlabel("Date")
plt.ylabel("Electric Production")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
tdf.index.duplicated().sum()  # check for duplicate timestamps

In [None]:
# EDA
from statsmodels.tsa.seasonal import seasonal_decompose

# Decompose the time series data
result = seasonal_decompose(tdf["IPG2211A2N"], model='additive', period=12)



# Plot decomposition
result.plot()
plt.show() 

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose


# Decompose 
result = seasonal_decompose(tdf["IPG2211A2N"], model='additive', period=12)
residual = result.resid

# Plot residuals
residual.plot(figsize=(10, 6))
plt.title("residuals After Decomposition")
plt.show()


# Detect large residuals
outliers = residual[np.abs(residual) > 3 * residual.std()]
print(outliers)

In [None]:
# checking if the data is stationary
from statsmodels.tsa.stattools import adfuller, kpss

result_adfuller = adfuller(tdf["IPG2211A2N"])
print("ADF Statistic:", result_adfuller[0])
print("p-value:", result_adfuller[1])


In [None]:
result_kpss = kpss(tdf["IPG2211A2N"])
print("KPSS Statistic:", result_kpss[0])
print("p-value:", result_kpss[1])

## Converting the non-stationary data to stationary

In [None]:
tdf.head()

In [None]:
# subtracting the current value from the previous value
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plot_acf(tdf["IPG2211A2N"], lags=50)

In [None]:
# checking for variance
rolling_std = tdf["IPG2211A2N"].rolling(window=12).std()
plt.plot(tdf["IPG2211A2N"], label='Original')
plt.plot(rolling_std, label='Rolling Std', color='red')
plt.legend()
plt.title("Rolling Standard Deviation")
plt.show()


In [None]:
# ARCH test
from statsmodels.stats.diagnostic import het_arch
arch_test = het_arch(tdf["IPG2211A2N"].dropna())
print("P-value: ", arch_test[1])

In [15]:
tdf_complement = tdf.copy()

In [16]:
tdf_complement["IPG2211A2N"] = np.log(tdf_complement["IPG2211A2N"])

In [None]:
rolling_std = tdf_complement["IPG2211A2N"].rolling(window=12).std()
plt.plot(tdf_complement["IPG2211A2N"], label='Original')
plt.plot(rolling_std, label='Rolling Std', color='red')
plt.legend()
plt.title("Rolling Standard Deviation")
plt.show()

In [None]:
arch_test = het_arch(tdf_complement["IPG2211A2N"].dropna())
print("P-value: ", arch_test[1])

In [19]:
from scipy import stats
tdf_complement["boxcox"], fitted_lambda = stats.boxcox(tdf_complement["IPG2211A2N"])

In [None]:
arch_test = het_arch(tdf_complement["boxcox"].dropna())
print("P-value: ", arch_test[1])

In [None]:
tdf_complement.head()

In [None]:
from arch import arch_model

# Drop NaNs from transformation if needed
series = tdf_complement["boxcox"].dropna()


# Fit GARCH(1,1)
model = arch_model(series, vol='Garch', p=1, q=1)
res = model.fit(disp="off")
print(res.summary())

In [None]:
def test_stationarity(timeseries):
    # Perform Augmented Dickey-Fuller test
    adf_result = adfuller(timeseries)
    print('ADF Statistic:', adf_result[0])
    print('p-value:', adf_result[1])
    if adf_result[1] <= 0.05:
        print("The time series is stationary")
    else:
        print("The time series is not stationary")

test_stationarity(tdf["IPG2211A2N"].dropna())

In [None]:
# removing the trend ---> using the difference method
tdf['diff'] = tdf['IPG2211A2N'].diff()
test_stationarity(tdf['diff'].dropna())

In [35]:
tdf_complement = tdf.copy()
tdf_complement = tdf.drop("IPG2211A2N", axis=1)

In [None]:
tdf_complement.head()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
# Decompose the time series data
result = seasonal_decompose(tdf_complement["diff"].dropna(), model='additive', period=12)
# Plot decomposition
result.plot()
plt.show()

In [None]:
tdf_complement.isnull().sum()

In [None]:
from statsmodels.tsa.seasonal import STL

stl = STL(tdf_complement["diff"].dropna(), period=397)
result = stl.fit()

result.plot()
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

plot_acf(tdf_complement["diff"].dropna(), lags=100)
plt.show()

In [None]:
tdf_complement.isnull().sum()

In [None]:
tdf_complement.head()

In [None]:
fft_vals = np.fft.fft(tdf_complement["diff"] - tdf_complement["diff"].mean())
plt.plot(np.abs(fft_vals))
plt.title("FFT Magnitudes")
plt.show()

In [57]:
# using fourier transofmers to  check seasonality

signal = tdf_complement["diff"].dropna().values
signal_centered = signal - np.mean(signal)

# step 2: Apply FFT
fft_vals = np.fft.fft(signal_centered)
fft_freq =  np.fft.fftfreq(len(signal_centered))

# step 3: Take only the positive frequencies
positive_freqs = fft_freq[:len(fft_freq) // 2]
magnitude = np.abs(fft_vals)[:len(fft_vals) // 2]

In [None]:
# Step 4: Plot
plt.figure(figsize=(12, 4))
plt.plot(positive_freqs, magnitude)
plt.title("Fourier Transform - Frequency Domain")
plt.xlabel("Frequency")
plt.ylabel("Magnitude")
plt.grid(True)
plt.show()