In [None]:
# Model 1

In [None]:
#Importing the Excel file into Python

import pandas as pd
import numpy as np
import statsmodels.api as sm

# Replace 'your_file.csv' with the path to your actual CSV file
data = pd.read_excel("G:/My Drive/Ireland - Jasleen/Trinity/Coursework/Dissertation/Drafts/Analysis and Findings/Country Wise Analysis/NIFTY 50 CleanData.xlsx")

# Check for missing values and remove rows with missing returns
data = data.dropna(subset=['Return'])

# Display the first few rows of the dataframe
print(data.head(20))

In [None]:
# Defining data range
# Specify the start and end dates of the desired range
start_date = '2016-05-02'
end_date = '2024-05-10'

# Filter the dataframe based on the date range
data = data.loc[(data['Date'] >= start_date) & (data['Date'] <= end_date)]

# Print the selected columns
print(data)

In [None]:
# Drop rows with NaT in the 'Date' column
data = data.dropna(subset=['Date'])

# Extract the day of the week from the 'Date' column
data['Day_of_Week'] = data['Date'].dt.day_name()

# Perform one-hot encoding to create dummy variables for each day of the week
data_with_dummies = pd.get_dummies(data, columns=['Day_of_Week'], drop_first=False)

# Check if any of the columns contain boolean values and convert them to integer (0 and 1)
for col in data_with_dummies.columns:
    if data_with_dummies[col].dtype == 'bool':
        data_with_dummies[col] = data_with_dummies[col].astype(int)


# Display the resulting dataframe
data_with_dummies.head()

In [None]:
#OVERALL MODEL

import statsmodels.api as sm

# Define the independent variables (dummy variables for days of the week)
X_1 = data_with_dummies[['Day_of_Week_Monday', 
                       'Day_of_Week_Thursday', 'Day_of_Week_Tuesday', 
                       'Day_of_Week_Wednesday','Day_of_Week_Friday']]

# Add a constant to the independent variables
# X_1 = sm.add_constant(X_1)

# Define the dependent variable (Returns)
Y_1 = data_with_dummies['Return']
# Check data types
# print(data_with_dummies.dtypes)



In [None]:
#ANOVA model

from statsmodels.formula.api import ols

# ANOVA
import statsmodels.api as sm

# Create the ANOVA model
ANOVAmodel = ols('Y_1~X_1', data=data_with_dummies).fit()

# Perform ANOVA
anova_table = sm.stats.anova_lm(ANOVAmodel)

# Print the ANOVA table
print(anova_table)

In [None]:
# Perform OLS regression
model1 = sm.OLS(Y_1, X_1).fit()

# Print the summary of the regression
print(model1.summary())

In [None]:
# Checking heteroscedasticity and autocorrelation

from statsmodels.stats.diagnostic import het_breuschpagan, acorr_breusch_godfrey, het_arch
# Add a constant column to the exog variable
exog_with_constant = sm.add_constant(model1.model.exog)

# Perform the Breusch-Pagan test with the updated exog variable
bp_test = het_breuschpagan(model1.resid, exog_with_constant)
bp_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Pagan test:", dict(zip(bp_labels, bp_test)))

# Breusch-Godfrey test for autocorrelation
bg_test = acorr_breusch_godfrey(model1, nlags=1)
bg_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Godfrey test:", dict(zip(bg_labels, bg_test)))



In [None]:
# Newey-West standard errors - Corrects for autocorrelation
nw_summary = model1.get_robustcov_results(cov_type='HAC', maxlags=1)
print(nw_summary.summary())

#Addressing heteroscedasticity
robust_summary = model1.get_robustcov_results(cov_type='HC1')
print(robust_summary.summary())

In [None]:
# Drop rows with NaT in the 'Date' column
data_with_dummies = data_with_dummies.dropna(subset=['Date'])

# Extract the day of the week from the 'Date' column
data_with_dummies['Day_of_Week'] = data_with_dummies['Date'].dt.day_name()

# Group the data by 'Day_of_Week' column
grouped_data = data_with_dummies.groupby('Day_of_Week')

In [None]:
# Calculate the mean, standard deviation, and skewness for each group
mean = grouped_data['Return'].mean()
std = grouped_data['Return'].std()
skewness = grouped_data['Return'].skew()

# Create a new dataframe to store the results
summary_df = pd.DataFrame({'Mean': mean, 'Standard Deviation': std, 'Skewness': skewness})

# Display the summary dataframe
print(summary_df)

In [None]:
# PRE-COVID-19 - ANOVA AND OLS

# Defining data range
# Specify the start and end dates of the desired range
start_date = '2016-05-02'
end_date = '2019-12-31'

# Filter the dataframe based on the date range
precovid_data = data_with_dummies.loc[(data_with_dummies['Date'] >= start_date) & (data_with_dummies['Date'] <= end_date)]

# Print the selected columns
print(precovid_data)

import statsmodels.api as sm

# Define the independent variables (dummy variables for days of the week)
X_2 = precovid_data[['Day_of_Week_Monday', 
                       'Day_of_Week_Thursday', 'Day_of_Week_Tuesday', 
                       'Day_of_Week_Wednesday','Day_of_Week_Friday']]

# Add a constant to the independent variables
# X_2 = sm.add_constant(X_2)

# Define the dependent variable (Returns)
Y_2 = precovid_data['Return']
# Check data types
# print(data_with_dummies.dtypes)

#ANOVA model

from statsmodels.formula.api import ols

# ANOVA
import statsmodels.api as sm

# Create the ANOVA model
ANOVAmodel = ols('Y_2~X_2', data=precovid_data).fit()

# Perform ANOVA
anova_table = sm.stats.anova_lm(ANOVAmodel)

# Print the ANOVA table
print(anova_table)

# Perform OLS regression
model2 = sm.OLS(Y_2, X_2).fit()

# Print the summary of the regression
print(model2.summary())


In [None]:
from statsmodels.stats.diagnostic import het_breuschpagan, acorr_breusch_godfrey, het_arch
# Add a constant column to the exog variable
exog_with_constant = sm.add_constant(model2.model.exog)

# Perform the Breusch-Pagan test with the updated exog variable
bp_test = het_breuschpagan(model2.resid, exog_with_constant)
bp_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Pagan test:", dict(zip(bp_labels, bp_test)))

# Breusch-Godfrey test for autocorrelation
bg_test = acorr_breusch_godfrey(model2, nlags=1)
bg_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Godfrey test:", dict(zip(bg_labels, bg_test)))



In [None]:
# Newey-West standard errors - Corrects for autocorrelation
nw_summary = model2.get_robustcov_results(cov_type='HAC', maxlags=1)
print(nw_summary.summary())

#Addressing heteroscedasticity
robust_summary = model2.get_robustcov_results(cov_type='HC1')
print(robust_summary.summary())

In [None]:
# DURING-COVID-19

# Defining data range
# Specify the start and end dates of the desired range
start_date = '2020-01-01'
end_date = '2021-12-31'

# Filter the dataframe based on the date range
duringcovid_data = data_with_dummies.loc[(data_with_dummies['Date'] >= start_date) & (data_with_dummies['Date'] <= end_date)]

# Print the selected columns
print(duringcovid_data)

import statsmodels.api as sm

# Define the independent variables (dummy variables for days of the week)
X_3 = duringcovid_data[['Day_of_Week_Monday', 
                       'Day_of_Week_Thursday', 'Day_of_Week_Tuesday', 
                       'Day_of_Week_Wednesday','Day_of_Week_Friday']]

# Add a constant to the independent variables
# X_3 = sm.add_constant(X_3)

# Define the dependent variable (Returns)
Y_3 = duringcovid_data['Return']
# Check data types
# print(data_with_dummies.dtypes)

#ANOVA model

from statsmodels.formula.api import ols

# ANOVA
import statsmodels.api as sm

# Create the ANOVA model
ANOVAmodel = ols('Y_3~X_3', data=duringcovid_data).fit()

# Perform ANOVA
anova_table = sm.stats.anova_lm(ANOVAmodel)

# Print the ANOVA table
print(anova_table)

# Perform OLS regression
model3 = sm.OLS(Y_3, X_3).fit()

# Print the summary of the regression
print(model3.summary())


In [None]:
from statsmodels.stats.diagnostic import het_breuschpagan, acorr_breusch_godfrey, het_arch
# Add a constant column to the exog variable
exog_with_constant = sm.add_constant(model3.model.exog)

# Perform the Breusch-Pagan test with the updated exog variable
bp_test = het_breuschpagan(model3.resid, exog_with_constant)
bp_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Pagan test:", dict(zip(bp_labels, bp_test)))

# Breusch-Godfrey test for autocorrelation
bg_test = acorr_breusch_godfrey(model3, nlags=1)
bg_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Godfrey test:", dict(zip(bg_labels, bg_test)))



In [None]:
# Newey-West standard errors - Corrects for autocorrelation
nw_summary = model3.get_robustcov_results(cov_type='HAC', maxlags=1)
print(nw_summary.summary())

#Addressing heteroscedasticity
robust_summary = model3.get_robustcov_results(cov_type='HC1')
print(robust_summary.summary())

In [None]:
# POST-COVID-19

# Defining data range
# Specify the start and end dates of the desired range
start_date = '2022-01-01'
end_date = '2024-05-10'

# Filter the dataframe based on the date range
postcovid_data = data_with_dummies.loc[(data_with_dummies['Date'] >= start_date) & (data_with_dummies['Date'] <= end_date)]

# Print the selected columns
print(postcovid_data)

import statsmodels.api as sm

# Define the independent variables (dummy variables for days of the week)
X_4 = postcovid_data[['Day_of_Week_Monday', 
                       'Day_of_Week_Thursday', 'Day_of_Week_Tuesday', 
                       'Day_of_Week_Wednesday','Day_of_Week_Friday']]

# Add a constant to the independent variables
# X_4 = sm.add_constant(X_4)

# Define the dependent variable (Returns)
Y_4 = postcovid_data['Return']
# Check data types
# print(data_with_dummies.dtypes)

#ANOVA model

from statsmodels.formula.api import ols
import statsmodels.api as sm

# Create the ANOVA model
ANOVAmodel = ols('Y_4~X_4', data=postcovid_data).fit()

# Perform ANOVA
anova_table = sm.stats.anova_lm(ANOVAmodel)

# Print the ANOVA table
print(anova_table)

# Perform OLS regression
model4 = sm.OLS(Y_4, X_4).fit()

# Print the summary of the regression
print(model4.summary())


In [None]:
from statsmodels.stats.diagnostic import het_breuschpagan, acorr_breusch_godfrey, het_arch
# Add a constant column to the exog variable
exog_with_constant = sm.add_constant(model4.model.exog)

# Perform the Breusch-Pagan test with the updated exog variable
bp_test = het_breuschpagan(model4.resid, exog_with_constant)
bp_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Pagan test:", dict(zip(bp_labels, bp_test)))

# Breusch-Godfrey test for autocorrelation
bg_test = acorr_breusch_godfrey(model4, nlags=1)
bg_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Godfrey test:", dict(zip(bg_labels, bg_test)))


In [None]:
# Newey-West standard errors - Corrects for autocorrelation
nw_summary = model4.get_robustcov_results(cov_type='HAC', maxlags=1)
print(nw_summary.summary())

#Addressing heteroscedasticity
robust_summary = model4.get_robustcov_results(cov_type='HC1')
print(robust_summary.summary())