In [None]:
#Importing the Excel file into Python

import pandas as pd
import numpy as np
import statsmodels.api as sm

# Replace 'your_file.csv' with the path to your actual CSV file
data = pd.read_excel("G:/My Drive/Ireland - Jasleen/Trinity/Coursework/Dissertation/Drafts/Analysis and Findings/Country Wise Analysis/NIFTY 50 CleanData.xlsx")

# Check for missing values and remove rows with missing returns
data = data.dropna(subset=['Return'])

# Display the first few rows of the dataframe
print(data.head(20))

In [None]:
# Defining data range
# Specify the start and end dates of the desired range
start_date = '2016-05-02'
end_date = '2024-05-10'

# Filter the dataframe based on the date range
data = data.loc[(data['Date'] >= start_date) & (data['Date'] <= end_date)]

# Print the selected columns
print(data)

In [None]:
# Drop rows with NaT in the 'Date' column
data = data.dropna(subset=['Date'])

# Extract the day of the week from the 'Date' column
data['Day_of_Week'] = data['Date'].dt.day_name()

# Perform one-hot encoding to create dummy variables for each day of the week
data_with_dummies = pd.get_dummies(data, columns=['Day_of_Week'], drop_first=False)

# Check if any of the columns contain boolean values and convert them to integer (0 and 1)
for col in data_with_dummies.columns:
    if data_with_dummies[col].dtype == 'bool':
        data_with_dummies[col] = data_with_dummies[col].astype(int)

# Display the resulting dataframe
data_with_dummies.head()

In [None]:
# Sort by the 'Return' column in descending order
sorted_df = data_with_dummies.sort_values(by='Return', ascending=False)
print(sorted_df.head(10))

In [None]:
# Calculating the indices / rows for each

total_rows = len(sorted_df)
top_20_percent_index = int(0.2 * total_rows)
bottom_20_percent_index = int(0.8 * total_rows)

In [None]:
# Top 20%
top_20_df = sorted_df.head(top_20_percent_index)

# Bottom 20%
bottom_20_df = sorted_df.tail(total_rows - bottom_20_percent_index)

# Middle 60%
middle_60_df = sorted_df.iloc[top_20_percent_index:bottom_20_percent_index]

In [None]:
# Top 20% Stats

from statsmodels.formula.api import ols
import statsmodels.api as sm

# Define the independent variables (dummy variables for days of the week)
X_6 = top_20_df[['Day_of_Week_Monday', 
                       'Day_of_Week_Thursday', 'Day_of_Week_Tuesday', 
                       'Day_of_Week_Wednesday','Day_of_Week_Friday']]

# Define the dependent variable (Returns)
Y_6 = top_20_df['Return']
# Check data types
# print(data_with_dummies.dtypes)

# Create the ANOVA model
ANOVAmodel = ols('Y_6~X_6', data=top_20_df).fit()

# Perform ANOVA
anova_table = sm.stats.anova_lm(ANOVAmodel)

# Print the ANOVA table
print(anova_table)

# Perform OLS regression
model6 = sm.OLS(Y_6, X_6).fit()

# Print the summary of the regression
print(model6.summary())

In [None]:
from statsmodels.stats.diagnostic import het_breuschpagan, acorr_breusch_godfrey, het_arch
# Add a constant column to the exog variable
exog_with_constant = sm.add_constant(model6.model.exog)

# Perform the Breusch-Pagan test with the updated exog variable
bp_test = het_breuschpagan(model6.resid, exog_with_constant)
bp_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Pagan test:", dict(zip(bp_labels, bp_test)))

# Breusch-Godfrey test for autocorrelation
bg_test = acorr_breusch_godfrey(model6, nlags=1)
bg_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Godfrey test:", dict(zip(bg_labels, bg_test)))


In [None]:
# Newey-West standard errors - Corrects for autocorrelation
nw_summary = model6.get_robustcov_results(cov_type='HAC', maxlags=1)
print(nw_summary.summary())

#Addressing heteroscedasticity
robust_summary = model6.get_robustcov_results(cov_type='HC1')
print(robust_summary.summary())

In [None]:
# Middle 60% Stats

import statsmodels.api as sm

# Define the independent variables (dummy variables for days of the week)
X_7 = middle_60_df[['Day_of_Week_Monday', 
                       'Day_of_Week_Thursday', 'Day_of_Week_Tuesday', 
                       'Day_of_Week_Wednesday','Day_of_Week_Friday']]

# Define the dependent variable (Returns)
Y_7 = middle_60_df['Return']
# Check data types
# print(data_with_dummies.dtypes)

# Create the ANOVA model
ANOVAmodel = ols('Y_7~X_7', data=middle_60_df).fit()

# Perform ANOVA
anova_table = sm.stats.anova_lm(ANOVAmodel)

# Print the ANOVA table
print(anova_table)

# Perform OLS regression
model7 = sm.OLS(Y_7, X_7).fit()

# Print the summary of the regression
print(model7.summary())


In [None]:
from statsmodels.stats.diagnostic import het_breuschpagan, acorr_breusch_godfrey, het_arch
# Add a constant column to the exog variable
exog_with_constant = sm.add_constant(model7.model.exog)

# Perform the Breusch-Pagan test with the updated exog variable
bp_test = het_breuschpagan(model7.resid, exog_with_constant)
bp_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Pagan test:", dict(zip(bp_labels, bp_test)))

# Breusch-Godfrey test for autocorrelation
bg_test = acorr_breusch_godfrey(model7, nlags=1)
bg_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Godfrey test:", dict(zip(bg_labels, bg_test)))


In [None]:
# Newey-West standard errors - Corrects for autocorrelation
nw_summary = model7.get_robustcov_results(cov_type='HAC', maxlags=1)
print(nw_summary.summary())

#Addressing heteroscedasticity
robust_summary = model7.get_robustcov_results(cov_type='HC1')
print(robust_summary.summary())

In [None]:
# Bottom 20% Stats

import statsmodels.api as sm

# Define the independent variables (dummy variables for days of the week)
X_8 = bottom_20_df[['Day_of_Week_Monday', 
                       'Day_of_Week_Thursday', 'Day_of_Week_Tuesday', 
                       'Day_of_Week_Wednesday','Day_of_Week_Friday']]

# Define the dependent variable (Returns)
Y_8 = bottom_20_df['Return']
# Check data types
# print(data_with_dummies.dtypes)

# Create the ANOVA model
ANOVAmodel = ols('Y_8~X_8', data=bottom_20_df).fit()

# Perform ANOVA
anova_table = sm.stats.anova_lm(ANOVAmodel)

# Print the ANOVA table
print(anova_table)

# Perform OLS regression
model8 = sm.OLS(Y_8, X_8).fit()

# Print the summary of the regression
print(model8.summary())

In [None]:
from statsmodels.stats.diagnostic import het_breuschpagan, acorr_breusch_godfrey, het_arch
# Add a constant column to the exog variable
exog_with_constant = sm.add_constant(model8.model.exog)

# Perform the Breusch-Pagan test with the updated exog variable
bp_test = het_breuschpagan(model8.resid, exog_with_constant)
bp_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Pagan test:", dict(zip(bp_labels, bp_test)))

# Breusch-Godfrey test for autocorrelation
bg_test = acorr_breusch_godfrey(model8, nlags=1)
bg_labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
print("Breusch-Godfrey test:", dict(zip(bg_labels, bg_test)))


In [None]:
# Newey-West standard errors - Corrects for autocorrelation
nw_summary = model8.get_robustcov_results(cov_type='HAC', maxlags=1)
print(nw_summary.summary())

#Addressing heteroscedasticity
robust_summary = model8.get_robustcov_results(cov_type='HC1')
print(robust_summary.summary())