In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.stattools import durbin_watson

In [2]:
data = pd.read_csv(r"D:\AJNIFM MBA Finance\3rd Term\Econometrics and DataAnalytics\Project\Data.csv")

In [3]:
data.columns

Index(['Year', 'GDP Q1', 'GDP Q2', 'GDP Q3', 'GDP Q4', 'ER Q1', 'ER Q2',
       'ER Q3', 'ER Q4', 'IR Q1', 'IR Q2', 'IR Q3', 'IR Q4', 'RR Q1', 'RR Q2',
       'RR Q3', 'RR Q4'],
      dtype='object')

In [4]:
data.isna().sum()

Year      0
GDP Q1    0
GDP Q2    0
GDP Q3    0
GDP Q4    0
ER Q1     0
ER Q2     0
ER Q3     0
ER Q4     0
IR Q1     0
IR Q2     0
IR Q3     0
IR Q4     0
RR Q1     0
RR Q2     0
RR Q3     0
RR Q4     0
dtype: int64

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [6]:
data_melted = data.melt(id_vars=["Year"], 
                        value_vars=["GDP Q1", "GDP Q2", "GDP Q3", "GDP Q4"], 
                        var_name="Quarter", 
                        value_name="GDP")

In [7]:
er_melted = data.melt(id_vars=["Year"], 
                      value_vars=["ER Q1", "ER Q2", "ER Q3", "ER Q4"], 
                      var_name="Quarter", 
                      value_name="Exchange Rate")

ir_melted = data.melt(id_vars=["Year"], 
                      value_vars=["IR Q1", "IR Q2", "IR Q3", "IR Q4"], 
                      var_name="Quarter", 
                      value_name="Inflation Rate")

rr_melted = data.melt(id_vars=["Year"], 
                      value_vars=["RR Q1", "RR Q2", "RR Q3", "RR Q4"], 
                      var_name="Quarter", 
                      value_name="Repo Rate")

In [8]:
X_columns = ["Exchange Rate", "Inflation Rate", "Repo Rate"]
X = pd.DataFrame({
    "Exchange Rate": er_melted["Exchange Rate"],
    "Inflation Rate": ir_melted["Inflation Rate"],
    "Repo Rate": rr_melted["Repo Rate"]
}).reset_index(drop=True)

# y variable is already the melted GDP data
y = data_melted["GDP"]

In [9]:
X = sm.add_constant(X)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
model = LinearRegression()
model.fit(X_train, y_train)

In [12]:
y_pred = model.predict(X_test)

In [13]:
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 29.380493914601082


In [14]:
# --- 1. Check Multicollinearity (VIF) ---
vif_data = pd.DataFrame()
vif_data["Feature"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
print("Variance Inflation Factor (VIF):\n", vif_data)

Variance Inflation Factor (VIF):
           Feature        VIF
0           const  41.774525
1   Exchange Rate   1.027098
2  Inflation Rate   1.034689
3       Repo Rate   1.014303


In [22]:
# --- 2. Residual Analysis ---
residuals = y_train - y_pred
fitted_vals = model.fittedvalues

plt.figure(figsize=(8, 5))
sns.residplot(x=fitted_vals, y=residuals, lowess=True, line_kws={'color': 'red'})
plt.axhline(0, linestyle="dashed", color="black")
plt.title("Residuals vs. Fitted Values")
plt.xlabel("Fitted Values")
plt.ylabel("Residuals")
plt.show()

ValueError: operands could not be broadcast together with shapes (32,) (8,) 

In [17]:
# --- 3. Check Heteroscedasticity (Breusch-Pagan Test) ---
bp_test = het_breuschpagan(residuals, X)
labels = ["LM Statistic", "p-value", "F-Statistic", "F p-value"]
print("Breusch-Pagan Test for Heteroscedasticity:\n", dict(zip(labels, bp_test)))

NameError: name 'residuals' is not defined

In [18]:
# --- 4. Check Autocorrelation (Durbin-Watson Test) ---
dw_test = durbin_watson(residuals)
print("Durbin-Watson Statistic:", dw_test)

NameError: name 'residuals' is not defined

In [19]:
# --- 5. Outliers and Leverage Points (Cook's Distance) ---
influence = model.get_influence()
cooks_d = influence.cooks_distance[0]

plt.figure(figsize=(8, 5))
plt.stem(cooks_d, markerfmt=",", use_line_collection=True)
plt.axhline(4/X.shape[0], linestyle="dashed", color="red")  # Threshold for Cook's D
plt.title("Cook's Distance for Outliers")
plt.xlabel("Observation Index")
plt.ylabel("Cook's Distance")
plt.show()

AttributeError: 'LinearRegression' object has no attribute 'get_influence'

In [20]:
# --- 6. Summary of Model ---
print(model.summary())

AttributeError: 'LinearRegression' object has no attribute 'summary'

In [15]:
def predict_gdp():
    print("Enter the following values for GDP prediction:")
    
    exchange_rate = float(input("Exchange Rate: "))
    inflation_rate = float(input("Inflation Rate: "))
    repo_rate = float(input("Repo Rate: "))
    
    user_input = pd.DataFrame({
        "Exchange Rate": [exchange_rate],
        "Inflation Rate": [inflation_rate],
        "Repo Rate": [repo_rate]
    })
    
    predicted_gdp = model.predict(user_input)
    print(f"\nPredicted GDP: {predicted_gdp[0]:.2f}")


In [18]:
predict_gdp()

Enter the following values for GDP prediction:


Exchange Rate:  2.6
Inflation Rate:  5.5
Repo Rate:  6.5



Predicted GDP: 4.80
