In [2]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind, f, f_oneway, chi2_contingency, ttest_1samp

In [3]:
def get_data_from_sheets(excel_file):
    return {sheet_name: excel_file.parse(sheet_name) 
          for sheet_name in excel_file.sheet_names}

In [4]:
index_file_path = '../Data/Index_Data.xlsx'

index_data = pd.ExcelFile(index_file_path)

index_sheets = get_data_from_sheets(index_data)



MF_file_path = '../Data/Mutual_Funds_Data.xlsx'

MF_data = pd.ExcelFile(MF_file_path)

MF_sheets = get_data_from_sheets(MF_data)



GB_file_path = '../Data/Gold_Bond_Data.xlsx'

GB_data = pd.ExcelFile(GB_file_path)

GB_sheets = get_data_from_sheets(GB_data)

In [6]:
len(index_sheets['NSEI'])

4263

In [7]:
index_sheets['NSEI'].isnull().sum().sum()

1309

## 1. Two Sample T-test

### Null Hypothesis: The mean closing price of `NSEI` is equal to `BSESN`.
### Alternative Hypothesis: The mean closing price of `NSEI` is not equal to `BSESN`.

In [197]:
NSEI = index_sheets['NSEI']
BSESN = index_sheets['BSESN']

In [198]:
print(NSEI['Close'].mean())
print(BSESN['Close'].mean())

10058.400535247383
23081.563106449936


In [199]:
t_stat, test1_p_val = ttest_ind(NSEI['Close'], BSESN['Close'])
print(f"T-Statistic: {t_stat}, p-value: {test1_p_val}")

T-Statistic: -41.708434171845624, p-value: 0.0


In [200]:
def test_hypothesis(p_val):
    if p_val < 0.05:
        return "Reject the null hypothesis."
    else:
        return "Fail to reject the null hypothesis."

In [201]:
test_hypothesis(test1_p_val)

'Reject the null hypothesis.'

#### Here, we have rejected the null hypothesis.
#### The mean closing price of `NSEI` is not equal to `BSESN`.

## 2. F-Test

### Null Hypothesis: The variances of returns of `CNXIT` and `CNXAUTO` are equal.
### Alternative Hypothesis: The variances of returns of `CNXIT` and `CNXAUTO` are not equal.

In [202]:
CNXIT = index_sheets['CNXIT']
CNXAUTO = index_sheets['CNXAUTO']

In [203]:
CNXIT.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits', 'Percent change in price', 'Percent change in volume'],
      dtype='object')

In [204]:
CNXIT_variance = np.var(CNXIT['Percent change in price'], ddof=1)
CNXAUTO_variance = np.var(CNXAUTO['Percent change in price'], ddof=1)

f_value = CNXIT_variance / CNXAUTO_variance

df1 = len(CNXIT['Percent change in price']) - 1
df2 = len(CNXAUTO['Percent change in price']) - 1

p_left = f.cdf(f_value, df1, df2) # cumulative distribution function (CDF)
p_right = f.sf(f_value, df1, df2) # Survival function = 1 - cdf
test2_p_val = 2 * min(p_left, p_right)

print('Degree of freedom 1:',df1)
print('Degree of freedom 2:',df2)
print("F-statistic:", f_value)
print("p_left:", p_left, "p_right:", p_right)
print("p-value:", test2_p_val)

Degree of freedom 1: 3933
Degree of freedom 2: 3285
F-statistic: 95.59777839094477
p_left: 0.9999999999999999 p_right: 0.0
p-value: 0.0


In [205]:
test_hypothesis(test2_p_val)

'Reject the null hypothesis.'

#### Here, we fail to reject the null hypothesis.

#### We don't have enough evidence to prove that The variances of returns of `CNXIT` and `CNXAUTO` are not equal

## 3. One-way ANOVA

### Null Hypothesis: The mean returns of all mutual funds are the same.
### Alternative Hypothesis: At least one mutual fund has a different mean return.

In [206]:
MF_mean_return = [df['Percent change in price'].dropna().mean() * 100 for df in MF_sheets.values()]
MF_mean_return

[0.060463159249797525,
 0.45537662198918805,
 0.05625228038695368,
 0.06721214091199408,
 0.07837813127213038,
 0.05425440269989836,
 0.053091239265748026,
 0.03980665090643376,
 0.049481134074111845,
 0.029237889760361204,
 0.03162655856975783,
 0.030434033572371375,
 0.023289205645453715,
 0.05856234610891682,
 0.055558805363787794,
 0.05608753081190579,
 2.57033268641501,
 0.011023274388856598]

In [207]:
f_stat, test3_p_val = f_oneway(
    *[df['Percent change in price'].dropna() for df in MF_sheets.values()]
)
print(f"F-Statistic: {f_stat}, p-value: {test3_p_val}")

F-Statistic: 0.4364619233562838, p-value: 0.9774192046172121


In [208]:
test_hypothesis(test3_p_val)

'Fail to reject the null hypothesis.'

#### Here. we fail to reject the null hypothesis.

#### Because of not having sufficient evidences, we are not able to prove that At least one mutual fund has a different mean return.

## 4. Chi-Square Test

### Null Hypothesis: Gold bond returns are independent of market conditions (e.g., bearish or bullish).
### Alternative Hypothesis: Gold bond returns are not independent of market conditions.

In [209]:
GoldBond = GB_sheets['Gold Bond']

In [210]:
GoldBond['Gold_Return_Category'] = GoldBond['Percent change in price'].apply(
    lambda x: 'Positive' if x > 0 else 'Negative'
)

NSEI['Market_Condition'] = NSEI['Percent change in price'].apply(
    lambda x: 'Bullish' if x > 0.005 else ('Bearish' if x < -0.005 else 'Neutral')
)

In [211]:
GoldBond.head()

Unnamed: 0,Date,series,OPEN,HIGH,LOW,PREV. CLOSE,ltp,close,vwap,52W H,52W L,VOLUME,VALUE,No of trades,Percent change in price,Percent change in volume,Gold_Return_Category
0,2019-12-30,GB,3700.0,3700.0,3700.0,3599.5,3700.0,3700.0,3700.0,3840.0,2725.0,6,22200.0,1,,,Negative
1,2019-12-23,GB,3750.0,3750.0,3599.0,3589.0,3599.0,3599.5,3616.33,3840.0,2655.29,45,162735.0,5,-0.027162,6.5,Negative
2,2019-12-17,GB,3590.0,3590.0,3589.0,3580.05,3589.0,3589.0,3589.4,3840.0,2655.29,25,89735.0,3,-0.002917,-0.444444,Negative
3,2019-12-16,GB,3580.05,3580.05,3580.05,3580.0,3580.05,3580.05,3580.05,3840.0,2655.29,1,3580.05,1,-0.002494,-0.96,Negative
4,2019-12-12,GB,3562.0,3580.0,3562.0,3585.0,3580.0,3580.0,3565.0,3840.0,2655.29,6,21390.0,2,-1.4e-05,5.0,Negative


In [212]:
GoldBond.tail()

Unnamed: 0,Date,series,OPEN,HIGH,LOW,PREV. CLOSE,ltp,close,vwap,52W H,52W L,VOLUME,VALUE,No of trades,Percent change in price,Percent change in volume,Gold_Return_Category
678,2024-01-15,GB,6240.01,6400.0,6240.01,6220.0,6400.0,6400.0,6293.34,6400.0,5360.0,6,37760.04,2,0.024,-0.943925,Positive
679,2024-01-11,GB,6220.0,6220.0,6220.0,6275.0,6220.0,6220.0,6220.0,6340.0,5360.0,5,31100.0,1,-0.028125,-0.166667,Negative
680,2024-01-09,GB,6275.0,6275.0,6275.0,6275.0,6275.0,6275.0,6275.0,6340.0,5360.0,20,125500.0,2,0.008842,3.0,Positive
681,2024-01-08,GB,6275.0,6275.0,6275.0,6340.0,6275.0,6275.0,6275.0,6340.0,5360.0,4,25100.0,2,0.0,-0.8,Negative
682,2024-01-05,GB,6340.0,6340.0,6340.0,6340.0,6340.0,6340.0,6340.0,6340.0,5300.0,9,57060.0,2,0.010359,1.25,Positive


In [213]:
NSEI.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Percent change in price,Percent change in volume,Market_Condition
4217,2024-12-02 00:00:00+05:30,24140.849609,24301.699219,24008.650391,24276.050781,220400,0,0,0.006007,-0.218717,Bullish
4218,2024-12-03 00:00:00+05:30,24367.5,24481.349609,24280.0,24457.150391,339500,0,0,0.00746,0.540381,Bullish
4219,2024-12-04 00:00:00+05:30,24488.75,24573.199219,24366.300781,24467.449219,348000,0,0,0.000421,0.025037,Neutral
4220,2024-12-05 00:00:00+05:30,24539.150391,24857.75,24295.550781,24708.400391,361500,0,0,0.009848,0.038793,Bullish
4221,2024-12-06 00:00:00+05:30,24729.449219,24751.050781,24620.5,24677.800781,0,0,0,-0.001238,-1.0,Neutral


In [214]:
NSEI['Date'] = pd.to_datetime(NSEI['Date'])
NSEI['Date'] = NSEI['Date'].dt.tz_localize(None)

In [215]:
merged_data = pd.merge(GoldBond[['Date', 'Gold_Return_Category']],
                       NSEI[['Date', 'Market_Condition']],
                       on='Date')

contingency_table = pd.crosstab(merged_data['Gold_Return_Category'], 
                                merged_data['Market_Condition'])
print(contingency_table)

Market_Condition      Bearish  Bullish  Neutral
Gold_Return_Category                           
Negative                  101      139      195
Positive                   50       77      117


In [216]:
chi2, test4_p_val, dof, expected = chi2_contingency(contingency_table)
print(f"Chi-Square Statistic: {chi2}, p-value: {test4_p_val}")

Chi-Square Statistic: 0.8621403265313069, p-value: 0.6498133161509081


In [217]:
test_hypothesis(test4_p_val)

'Fail to reject the null hypothesis.'

#### From p value of the test, we get to know that we fail to reject the null hypothesis.

#### We don't have sufficient evidences to justify that Gold bond returns are not independent of market conditions.