In [1]:
import pandas as pd 
import yfinance as yfin 
import matplotlib.pyplot as plt 
import os
import numpy as np



In [2]:
from scipy.stats import kruskal
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import multipletests

In [3]:
# individual stock string
mco_str = 'MCO'  # conservative
msft_str = 'MSFT'        # some volatile
amd_str = 'AMD'          # more volatile

In [4]:
# etf traders
usmv_str = 'USMV'        # conservative
invesco_str = 'QQQ'
arkk_str = 'ARKK'      # more volatile

In [5]:
# Individual tickers
mco_ticker = yfin.Ticker(mco_str)
msft_ticker = yfin.Ticker(msft_str)
amd_ticker = yfin.Ticker(amd_str)

In [6]:
# ETF traders tickers
usmv_ticker = yfin.Ticker(usmv_str)
invesco_ticker = yfin.Ticker(invesco_str)
arkk_ticker = yfin.Ticker(arkk_str)

In [7]:
start_date = '2021-03-11'
end_date = '2024-01-24'
one_day = '1d'

In [8]:
individual_data = []
individual_tickers = [mco_ticker, msft_ticker, amd_ticker]

for tick in individual_tickers:
    tick_data = tick.history(start=start_date, end=end_date, period=one_day)
    individual_data.append(tick_data)

In [9]:
etf_data = []
etf_tickers = [usmv_ticker, invesco_ticker, arkk_ticker]

for etick in etf_tickers:
    etick_data = etick.history(start=start_date, end=end_date, period=one_day)
    etf_data.append(etick_data)


#### need 658 rows becuase SVOL has only 658

In [10]:
# calculate return for individual tickers
for tdata in individual_data:
    tdata['ireturns'] = np.log(tdata['Close'] / tdata['Close'].shift(1))
    tdata['ivolatility'] = np.abs(tdata['ireturns'])
    tdata = tdata.dropna()


In [11]:
# calculate return for individual tickers
for edata in etf_data:
    edata['ereturns'] = np.log(edata['Close'] / edata['Close'].shift(1))
    edata['evolatility'] = np.abs(edata['ereturns'])
    edata = edata.dropna()


In [12]:
# we only have 678 days of data in svol, slice all data based on the size of sval
ndays = 678

In [13]:
idata = [individual_data[0].iloc[:ndays,7].dropna().values, individual_data[1].iloc[:ndays,7].dropna().values, individual_data[2].iloc[:ndays,7].dropna().values]

In [14]:
edata = [etf_data[0].dropna().iloc[:ndays,8].values, etf_data[1].dropna().iloc[:ndays,8].values, etf_data[2].dropna().iloc[:ndays,8].values]

In [15]:
len(individual_data[2].dropna().iloc[:ndays,7].values)

678

## RETURN

In [16]:
# Perform Kruskal-Wallis test across the three groups
irkruskal_stat, irp_value = kruskal(idata[0], idata[1], idata[2])
print('Individuals Returns Kruskal-Wallis test:')
irkruskal_stat, irp_value

Individuals Returns Kruskal-Wallis test:


(np.float64(0.10875072480731338), np.float64(0.9470765428457893))

In [17]:
# Perform Kruskal-Wallis test across the three groups
erkruskal_stat, erp_value = kruskal(edata[0], edata[1], edata[2])
print('ETF Kruskal-Wallis test:')
erkruskal_stat, erp_value

ETF Kruskal-Wallis test:


(np.float64(4.0442746286618965), np.float64(0.1323722414303266))

## VOLATILITY

In [18]:
idata_vol = [individual_data[0].iloc[:,-1].dropna().values, individual_data[1].iloc[:,-1].dropna().values, individual_data[2].iloc[:,-1].dropna().values]

In [19]:
edata_vol = [etf_data[0].iloc[:,-1].dropna().values, etf_data[1].iloc[:,-1].dropna().values, etf_data[2].iloc[:,-1].dropna().values]

In [20]:
# Perform Kruskal-Wallis test across the three groups
ivkruskal_stat, ivp_value = kruskal(idata_vol[0], idata_vol[1], idata_vol[2])
print('Volatility Kruskal-Wallis test:')
ivkruskal_stat, ivp_value

Volatility Kruskal-Wallis test:


(np.float64(166.4249499672184), np.float64(7.265764028649003e-37))

In [21]:
# Perform Kruskal-Wallis test across the three groups
evkruskal_stat, evp_value = kruskal(edata_vol[0], edata_vol[1], edata_vol[2])
print('Volatility Kruskal-Wallis test:')
evkruskal_stat, evp_value

Volatility Kruskal-Wallis test:


(np.float64(515.9386724558755), np.float64(9.232956213514823e-113))

## MANN WHITNEY U TEST

In [22]:
# Perform pairwise Mann-Whitney U tests
irp_values = []
irp_values.append(mannwhitneyu(idata[0], idata[1]).pvalue)
irp_values.append(mannwhitneyu(idata[0], idata[2]).pvalue)
irp_values.append(mannwhitneyu(idata[1], idata[2]).pvalue)

# Apply Bonferroni correction
irp_corrected_p_values = multipletests(irp_values, method='bonferroni')[1]

# Print corrected p-values
print("Individuals Returns Corrected p-values:", irp_corrected_p_values)

Individuals Returns Corrected p-values: [1. 1. 1.]


In [23]:
# Perform pairwise Mann-Whitney U tests
ivp_values = []
ivp_values.append(mannwhitneyu(idata_vol[0], idata_vol[1]).pvalue)
ivp_values.append(mannwhitneyu(idata_vol[0], idata_vol[2]).pvalue)
ivp_values.append(mannwhitneyu(idata_vol[1], idata_vol[2]).pvalue)

# Apply Bonferroni correction
ivp_corrected_p_values = multipletests(ivp_values, method='bonferroni')[1]

# Print corrected p-values
print("Individuals Volatility Corrected p-values:", ivp_corrected_p_values)

Individuals Volatility Corrected p-values: [5.70377112e-01 7.07382511e-31 9.62493553e-26]


In [24]:
# Perform pairwise Mann-Whitney U tests
erp_values = []
erp_values.append(mannwhitneyu(edata[0], edata[1]).pvalue)
erp_values.append(mannwhitneyu(edata[0], edata[2]).pvalue)
erp_values.append(mannwhitneyu(edata[1], edata[2]).pvalue)

# Apply Bonferroni correction
erp_corrected_p_values = multipletests(erp_values, method='bonferroni')[1]

# Print corrected p-values
print("ETF Returns Corrected p-values:", erp_corrected_p_values)

ETF Returns Corrected p-values: [1.         0.24764583 0.26698784]


In [25]:
# Perform pairwise Mann-Whitney U tests
evp_values = []
evp_values.append(mannwhitneyu(edata_vol[0], edata_vol[1]).pvalue)
evp_values.append(mannwhitneyu(edata_vol[0], edata_vol[2]).pvalue)
evp_values.append(mannwhitneyu(edata_vol[1], edata_vol[2]).pvalue)

# Apply Bonferroni correction
evp_corrected_p_values = multipletests(evp_values, method='bonferroni')[1]

# Print corrected p-values
print("ETF Volatility Corrected p-values:", evp_corrected_p_values)

ETF Volatility Corrected p-values: [1.26101870e-022 1.77246409e-103 5.52017328e-047]
