In [1]:
from sklearn.impute import SimpleImputer
import pandas as pd

# Load the merged dataset
data = pd.read_csv('D:/KAIM/Week 12/KAIM WEEK12/Merged_Economic_Brent_Data.csv')

# Define the imputer (e.g., fill NaNs with the mean of each column)
imputer = SimpleImputer(strategy='mean')  # You can change 'mean' to 'median' or 'most_frequent'

# Fit and transform the dataset (excluding non-numeric columns)
numeric_columns = data.select_dtypes(include=['float64', 'int64']).columns  # Select numeric columns only
data[numeric_columns] = imputer.fit_transform(data[numeric_columns])

# Check if missing values are handled
print("Missing values after imputation:")
print(data.isnull().sum())


Missing values after imputation:
Date                     0
Price                    0
GDP_Growth               0
Inflation_Rate           0
Unemployment_Rate        0
USD_EUR_Exchange_Rate    0
dtype: int64


In [2]:
# Impute GDP_Growth with mean
data['GDP_Growth'].fillna(data['GDP_Growth'].mean(), inplace=True)

# Impute Inflation_Rate with median
data['Inflation_Rate'].fillna(data['Inflation_Rate'].median(), inplace=True)

# Impute Unemployment_Rate with mode
data['Unemployment_Rate'].fillna(data['Unemployment_Rate'].mode()[0], inplace=True)

# Check if missing values are handled
print("Missing values after column-wise imputation:")
print(data.isnull().sum())


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['GDP_Growth'].fillna(data['GDP_Growth'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Inflation_Rate'].fillna(data['Inflation_Rate'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the 

Missing values after column-wise imputation:
Date                     0
Price                    0
GDP_Growth               0
Inflation_Rate           0
Unemployment_Rate        0
USD_EUR_Exchange_Rate    0
dtype: int64


In [3]:
# Save the imputed dataset to a new file
data.to_csv('D:/KAIM/Week 12/KAIM WEEK12/Imputed_Economic_Brent_Data.csv', index=False)
print("Imputed dataset saved successfully!")


Imputed dataset saved successfully!


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_csv('D:/KAIM/Week 12/KAIM WEEK12/Imputed_Economic_Brent_Data.csv')

# Features (independent variables) and target (dependent variable)
X = data[['GDP_Growth', 'Inflation_Rate', 'Unemployment_Rate', 'USD_EUR_Exchange_Rate']]  # Replace with relevant features
y = data['Price']  # Target variable (Brent oil prices)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
from statsmodels.tsa.arima.model import ARIMA

# Prepare time series data (use 'Date' as index)
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

# Fit an ARIMA model (adjust p, d, q for better performance)
arima_model = ARIMA(data['Price'], order=(5, 1, 0))  # Example: (p=5, d=1, q=0)
arima_model_fit = arima_model.fit()

# Forecast future prices
forecast = arima_model_fit.forecast(steps=10)  # Forecast the next 10 time points
print("ARIMA Forecast:", forecast)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


ARIMA Forecast: 6724    74.638145
6725    74.618738
6726    74.662504
6727    74.664369
6728    74.659828
6729    74.658058
6730    74.659085
6731    74.659404
6732    74.659310
6733    74.659225
Name: predicted_mean, dtype: float64


  return get_prediction_index(
  return get_prediction_index(


In [8]:
from statsmodels.tsa.api import VAR

# Prepare the data for VAR (ensure no missing values and set Date as index)
var_data = data[['Price', 'GDP_Growth', 'Inflation_Rate', 'Unemployment_Rate']].dropna()
var_model = VAR(var_data)

# Fit the model
fitted_model = var_model.fit(maxlags=5)  # Choose an appropriate lag value (use criteria like AIC/BIC)
print(fitted_model.summary())

# Forecast future values
forecast = fitted_model.forecast(var_data.values[-fitted_model.k_ar:], steps=10)
print("VAR Forecast:", forecast)


  self._init_dates(dates, freq)


  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Mon, 10, Mar, 2025
Time:                     19:36:41
--------------------------------------------------------------------
No. of Equations:         4.00000    BIC:                    15.8187
Nobs:                     6719.00    HQIC:                   15.7630
Log likelihood:          -90908.3    FPE:                6.80771e+06
AIC:                      15.7336    Det(Omega_mle):     6.72326e+06
--------------------------------------------------------------------
Results for equation Price
                          coefficient       std. error           t-stat            prob
---------------------------------------------------------------------------------------
const                       -3.115366         1.042157           -2.989           0.003
L1.Price                     1.028952         0.012219           84.211           0.000
L1.GDP_Growth                0.0

In [9]:
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression

# Fit a Markov Switching Model with 2 regimes (low and high volatility)
ms_model = MarkovRegression(data['Price'], k_regimes=2, trend='c', switching_variance=True)
ms_results = ms_model.fit()

print(ms_results.summary())

# Regime probabilities
regime_probs = ms_results.smoothed_marginal_probabilities
print("Regime Probabilities:", regime_probs)


  self._init_dates(dates, freq)


                        Markov Switching Model Results                        
Dep. Variable:                  Price   No. Observations:                 6724
Model:               MarkovRegression   Log Likelihood              -28356.907
Date:                Mon, 10 Mar 2025   AIC                          56725.815
Time:                        19:37:49   BIC                          56766.695
Sample:                             0   HQIC                         56739.930
                               - 6724                                         
Covariance Type:               approx                                         
                             Regime 0 parameters                              
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         33.9115      0.246    137.764      0.000      33.429      34.394
sigma2       137.0006      4.005     34.209      0.0

In [11]:
import joblib

# Save the fitted VAR model
joblib.dump(fitted_model, 'VAR_model.pkl')
print("VAR model saved successfully as 'VAR_model.pkl'.")


VAR model saved successfully as 'VAR_model.pkl'.


In [12]:
# Load the saved VAR model
loaded_var_model = joblib.load('VAR_model.pkl')
print("VAR model loaded successfully.")

# Use the loaded model for forecasting
forecast_var = loaded_var_model.forecast(var_data.values[-loaded_var_model.k_ar:], steps=10)
print("VAR Forecast:", forecast_var)


VAR model loaded successfully.
VAR Forecast: [[7.46312881e+01 1.40626989e+04 2.01752170e+02 5.76341689e+00]
 [7.46092893e+01 1.40439796e+04 2.01783851e+02 5.76752541e+00]
 [7.46458180e+01 1.40364740e+04 2.01661966e+02 5.76329654e+00]
 [7.46385659e+01 1.40418351e+04 2.01773214e+02 5.76107230e+00]
 [7.46280519e+01 1.40447370e+04 2.01820091e+02 5.76178774e+00]
 [7.46200194e+01 1.40462789e+04 2.01871906e+02 5.76214791e+00]
 [7.46147288e+01 1.40451229e+04 2.01886486e+02 5.76215671e+00]
 [7.46084485e+01 1.40457073e+04 2.01903549e+02 5.76185096e+00]
 [7.46019799e+01 1.40466622e+04 2.01939287e+02 5.76170725e+00]
 [7.45956616e+01 1.40473932e+04 2.01963687e+02 5.76168996e+00]]


In [13]:
import joblib

# Save the ARIMA model
joblib.dump(arima_model_fit, 'ARIMA_model.pkl')
print("ARIMA model saved successfully as 'ARIMA_model.pkl'.")


ARIMA model saved successfully as 'ARIMA_model.pkl'.


In [14]:
# Load the saved ARIMA model
loaded_arima_model = joblib.load('ARIMA_model.pkl')
print("ARIMA model loaded successfully.")

# Use the loaded model for forecasting
forecast_arima = loaded_arima_model.forecast(steps=10)
print("ARIMA Forecast:", forecast_arima)


ARIMA model loaded successfully.
ARIMA Forecast: 6724    74.638145
6725    74.618738
6726    74.662504
6727    74.664369
6728    74.659828
6729    74.658058
6730    74.659085
6731    74.659404
6732    74.659310
6733    74.659225
Name: predicted_mean, dtype: float64


  return get_prediction_index(
  return get_prediction_index(
