# Risk Assesement

In [27]:
%matplotlib inline

In [28]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.seasonal import STL
from scipy.stats import ks_2samp
from Portfolio import selected_tickers

import math
import yfinance as yf
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import datetime as time

In [29]:
stock_data_june = pd.read_csv("stock_market_june2025.csv")
stock_data_53 = pd.read_csv("stock_data_july_2025.csv")

In [30]:
pd.set_option('display.max_rows', 100) 
stock_data_june['Ticker'].value_counts().gt(2).sum()

0

In [31]:
# Checking tickers that appear more than once
stock_data_53['Ticker'].value_counts().gt(1).sum()
# Checking tickers that appear more than twice
stock_data_53['Ticker'].value_counts().gt(2).sum()

# Filtering
valid_tickers = stock_data_53['Ticker'].value_counts()
valid_tickers = valid_tickers[valid_tickers > 2].index
sorted_stock_data = stock_data_53[stock_data_53['Ticker'].isin(valid_tickers)]

In [32]:
len(sorted_stock_data)

4346

In [33]:
# Sorting the df
sorted_stock_data["Date"] = pd.to_datetime(sorted_stock_data["Date"], dayfirst=True, format="mixed")
sorted_stock_data = sorted_stock_data.sort_values(['Ticker', 'Date'])


### Percentage Change

In [34]:
# Calculating percentage change
sorted_stock_data['Return'] = sorted_stock_data.groupby('Ticker')['Close Price'].pct_change()
# sorted_stock_data

### Volatility

In [35]:
# Volatility
volatility = sorted_stock_data.groupby('Ticker')['Return'].std()
volatility.dropna(inplace=True)
volatility.to_csv('volatility-by-ticker.csv')
# volatility

### Value At Risk

In [36]:
# Value at Risk
Var_95_by_ticker = sorted_stock_data.groupby('Ticker')['Return'].apply(
    lambda x: np.percentile(x.dropna(), 5)
)
Var_95_by_ticker.to_csv('VaR-by-ticker.csv')
Var_95_by_ticker

Ticker
AAPL    -0.133907
ABBV    -0.057638
ABT     -0.074918
ADBE    -0.077749
ADP     -0.052902
AMD     -0.073338
AMGN    -0.047398
AMT     -0.067257
AMZN    -0.072653
AVGO    -0.044499
BAC     -0.088677
BKNG    -0.034992
BLK     -0.048122
BMY     -0.066549
BRK.B   -0.053669
C       -0.071408
CAT     -0.050349
CB      -0.064526
COST    -0.046691
CRM     -0.057804
CVX     -0.074692
DIS     -0.073564
DUK     -0.073431
ELV     -0.046449
GE      -0.083137
GILD    -0.076943
GOOGL   -0.080879
GS      -0.064442
HD      -0.053391
HON     -0.056123
IBM     -0.080025
INTC    -0.095005
INTU    -0.077564
ISRG    -0.067975
JNJ     -0.053087
JPM     -0.090388
KO      -0.062069
LLY     -0.045748
LMT     -0.050913
LOW     -0.047739
MA      -0.056240
MDLZ    -0.071827
MDT     -0.080827
META    -0.065789
MMC     -0.079160
MRK     -0.046618
MS      -0.058834
MSFT    -0.085808
NEE     -0.089459
NFLX    -0.051712
NKE     -0.080938
NOW     -0.044399
NVDA    -0.064698
ORCL    -0.072944
PEP     -0.098196
PFE

### Conditional VaR

In [37]:
# Conditional VaR
CVaR_95_by_Ticker = sorted_stock_data.groupby('Ticker')['Return'].apply(
    lambda x: x[x <= np.percentile(x.dropna(), 5)].mean()
)
CVaR_95_by_Ticker.to_csv('CVaR-by-Ticker.csv')

### Drawdown

In [38]:
# Cumulative Return Calculation
sorted_stock_data['CumulativeReturn'] = (1 + sorted_stock_data['Return']).groupby(sorted_stock_data['Ticker']).cumprod()


In [39]:
# Running Maximum Calcualtion
sorted_stock_data['RunningMax'] = sorted_stock_data.groupby('Ticker')['CumulativeReturn'].cummax()

In [40]:
# Drawdown Calculation
sorted_stock_data['Drawdown'] = sorted_stock_data['CumulativeReturn'] / sorted_stock_data['RunningMax'] - 1
max_drawdown = sorted_stock_data.groupby('Ticker')['Drawdown'].min()
sorted_stock_data.to_csv('data-with-drawdown.csv')

In [41]:
stock_data = stock_data_june

In [42]:
stock_data_53['Date']= pd.to_datetime(stock_data_53['Date'])
stock_data_june['Date']= pd.to_datetime(stock_data_june['Date'], format='%d-%m-%Y')

filtered = stock_data_53[stock_data_53['Date'] <= '2025-06-30']
stock_data_june.reset_index(drop=True).equals(filtered.reset_index(drop=True))

False

In [43]:
stock_data_june['Ticker'].value_counts()

Ticker
OUF    2
XVD    2
CLL    2
GFF    2
TXP    2
      ..
JKX    1
OUV    1
KOM    1
LJX    1
UAA    1
Name: count, Length: 1691, dtype: int64

In [44]:
filtered['Ticker'].value_counts()

Ticker
AAPL     30
REGN     30
PYPL     30
PM       30
PLD      30
PG       30
PFE      30
PEP      30
ORCL     30
NVDA     30
NOW      30
NKE      30
NFLX     30
NEE      30
MSFT     30
MS       30
MRK      30
MMC      30
META     30
QCOM     30
RTX      30
ABBV     30
SCHW     30
XOM      30
WMT      30
VZ       30
VRTX     30
V        30
UPS      30
UNP      30
UNH      30
UBER     30
TXN      30
TSLA     30
TMO      30
TJX      30
T        30
SYK      30
SPGI     30
SO       30
MDT      30
MDLZ     30
MA       30
LOW      30
COST     30
CB       30
CAT      30
C        30
BRK.B    30
BMY      30
BLK      30
BKNG     30
BAC      30
AVGO     30
AMZN     30
AMT      30
AMGN     30
AMD      30
ADP      30
ADBE     30
ABT      30
CRM      30
CVX      30
DIS      30
INTC     30
LMT      30
LLY      30
KO       30
JPM      30
JNJ      30
ISRG     30
INTU     30
IBM      30
DUK      30
HON      30
HD       30
GS       30
GOOGL    30
GILD     30
GE       30
ELV      30
ZTS      30
Name: cou

# Time Series Analysis

# Scrapping

In [45]:
dataset_tickers = stock_data_53['Ticker'].unique().tolist()
dataset_tickers = [t.replace('.', '-') for t in dataset_tickers]
ten_year_data = yf.download(dataset_tickers, start='2014-06-01', end='2025-06-01', threads=False)[["Open","High","Low","Close", "Volume"]]


[*********************100%***********************]  82 of 82 completed


In [46]:
# Removing UBER because it started trading publiclly in 2019, and some values for PYPL are also missing
ten_year_data.drop(['UBER', 'PYPL'], axis=1, level=1, inplace=True)

In [47]:
pd.reset_option('display.max_rows')

In [48]:
# mask = temp.isna()
# stacked = mask.stack(level=list(range(temp.columns.nlevels)), dropna=False)
# nan_index = stacked[stacked].index.tolist()
# cols = ['row'] + [f'col_level_{i}' for i in range(temp.columns.nlevels)]
# nan_df = pd.DataFrame(nan_index, columns=cols)

#### There are missing values. These values are refilled by forward fill

In [49]:
missing_days = ten_year_data.index.to_series().diff().dt.days.value_counts()
missing_days

Date
1.0    2166
3.0     497
4.0      76
2.0      27
Name: count, dtype: int64

In [50]:
(ten_year_data.isnull() == True).any().any()

False

In [51]:
ten_year_data.shape

(2767, 400)

In [52]:
date_without_missing = pd.date_range(start=ten_year_data.index.min(), end=ten_year_data.index.max())
ten_year_data_fill = ten_year_data.reindex(date_without_missing).ffill()

In [53]:
ten_year_data_fill.shape

(4016, 400)

In [54]:
(ten_year_data_fill.isnull() == True).any().any()

False

In [55]:
ten_year_data_fill['Close']

Ticker,AAPL,ABBV,ABT,ADBE,ADP,AMD,AMGN,AMT,AMZN,AVGO,...,TXN,UNH,UNP,UPS,V,VRTX,VZ,WMT,XOM,ZTS
2014-06-02,19.744545,34.322796,32.082165,64.639999,54.251514,3.970000,84.674362,68.327950,15.442000,5.360530,...,34.601238,66.218895,77.222763,70.447121,49.329597,72.629997,28.186134,20.540115,62.015339,28.361811
2014-06-03,20.023762,34.462238,32.098278,64.089996,53.793617,3.940000,85.161545,68.583061,15.359500,5.346313,...,34.601238,66.560532,76.532623,70.007164,48.821327,72.959999,27.758129,20.526733,62.294605,28.664505
2014-06-04,20.252399,34.595341,32.041859,64.169998,53.875618,4.040000,86.637627,68.807213,15.339000,5.358284,...,34.498131,67.085495,76.703201,69.695816,48.846748,72.639999,27.679298,20.639120,62.077408,28.774588
2014-06-05,20.331871,35.051708,32.364281,65.470001,53.786785,4.080000,85.612373,69.363823,16.178499,5.377739,...,34.903145,66.493874,77.676407,70.135780,49.029255,73.059998,27.752502,20.689962,62.393890,28.994724
2014-06-06,20.275969,34.924950,32.283684,66.910004,54.408718,4.060000,85.670532,69.897209,16.483500,5.359781,...,34.976768,66.602173,78.265755,70.115463,49.209461,73.489998,27.831350,20.660532,63.045429,29.113972
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-26,195.048645,181.690720,130.713272,407.690002,317.842712,110.309998,269.473938,210.205597,200.990005,227.792633,...,175.034866,291.646637,219.402191,93.602295,352.919006,436.000000,42.630703,96.115509,102.080101,162.038177
2025-05-27,199.983047,184.129669,132.345932,413.100006,322.415985,114.559998,277.201263,212.031235,206.020004,234.694534,...,181.925064,291.084198,221.614899,95.741890,358.668884,446.000000,42.807838,97.352623,102.565582,165.705902
2025-05-28,200.192795,181.522171,131.430054,412.230011,320.861908,112.860001,276.159698,210.800919,204.720001,238.459198,...,182.828522,294.152893,219.471329,94.946899,359.098145,445.100006,42.443726,97.013412,101.168587,164.848770
2025-05-29,199.723328,184.030518,132.256348,413.359985,320.842072,113.029999,281.258362,212.041168,205.699997,240.988907,...,183.662506,294.212128,219.797318,96.281693,361.763428,447.089996,42.640545,96.873741,101.743240,166.582977


## Returns

In [56]:
ten_year_returns = ten_year_data_fill['Close'].pct_change()

## Features

### Moving Averages

In [57]:
mo_a_5 = ten_year_returns.rolling(5).mean()
mo_a_5.columns = pd.MultiIndex.from_product([['MA_5'], mo_a_5.columns])
mo_a_10 = ten_year_returns.rolling(10).mean()
mo_a_10.columns = pd.MultiIndex.from_product([['MA_10'], mo_a_10.columns])
mo_a_20 = ten_year_returns.rolling(20).mean()
mo_a_20.columns = pd.MultiIndex.from_product([['MA_20'], mo_a_20.columns])
mo_a_50 = ten_year_returns.rolling(50).mean()
mo_a_50.columns = pd.MultiIndex.from_product([['MA_50'], mo_a_50.columns])
mo_a_100 = ten_year_returns.rolling(100).mean()
mo_a_100.columns = pd.MultiIndex.from_product([['MA_100'], mo_a_100.columns])
mo_a_200 = ten_year_returns.rolling(200).mean()
mo_a_200.columns = pd.MultiIndex.from_product([['MA_200'], mo_a_200.columns])

In [58]:
moving_averages = pd.concat([
    mo_a_5,
    mo_a_10,
    mo_a_20,
    mo_a_50,
    mo_a_100,
    mo_a_200
], axis=1)

#### Scaling

In [59]:
# using standardization scaling on my data because i want to calculate returns
scaled_array = StandardScaler().fit_transform(moving_averages)
scaled_moving_averages = pd.DataFrame(
    scaled_array,
    index=moving_averages.index,
    columns=moving_averages.columns
)

### Volatility

In [60]:
vol_21 = ten_year_returns.rolling(21).std()
vol_63 = ten_year_returns.rolling(63).std()
vol_252 = ten_year_returns.rolling(252).std()

#### Scaling

In [61]:
scaled_vol_21 = pd.DataFrame(
    StandardScaler().fit_transform(vol_21),
    index= vol_21.index,
    columns= vol_21.columns
)
scaled_vol_63 = pd.DataFrame(
    StandardScaler().fit_transform(vol_63),
    index= vol_63.index,
    columns= vol_63.columns
)
scaled_vol_252 = pd.DataFrame(
    StandardScaler().fit_transform(vol_252),
    index= vol_252.index,
    columns= vol_252.columns
)

### Market Index

In [62]:
market_index = yf.download('^GSPC', start='2014-06-01', end='2025-06-01')

[*********************100%***********************]  1 of 1 completed


In [63]:
market_index.index.to_series().diff().value_counts()

Date
1 days    2166
3 days     497
4 days      76
2 days      27
Name: count, dtype: int64

In [64]:
missing = pd.date_range(market_index.index.min(), market_index.index.max())
market_index = market_index.reindex(missing).ffill()

In [65]:
market_index_return = market_index['Close'].pct_change().dropna()

In [66]:
market_index_return.shape

(4015, 1)

#### Scaling

In [67]:
sc_market_index_returns = pd.DataFrame(
    StandardScaler().fit_transform(market_index_return),
    index=market_index_return.index,
    columns=market_index_return.columns
)

### RSI

In [68]:
delta = ten_year_data_fill['Close'].diff()

gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)

avg_gain = gain.ewm(alpha=1/14, min_periods=14, adjust=False).mean()
avg_loss = loss.ewm(alpha=1/14, min_periods=14, adjust=False).mean()

rs = avg_gain/avg_loss

rsi = 100 - (100/(1 + rs))

#### Scaling

In [69]:
scaled_rsi = pd.DataFrame(
    StandardScaler().fit_transform(rsi),
    index=rsi.index,columns=rsi.columns
)

### MACD

In [70]:
fast_emw = ten_year_data_fill['Close'].ewm(alpha=1/12,adjust=False).mean()
slow_emw = ten_year_data_fill['Close'].ewm(alpha=1/26, adjust=False).mean()
macd_line = fast_emw - slow_emw

signal_line = macd_line.ewm(alpha=1/9, adjust=False).mean()

macd_histogram = macd_line - signal_line

#### Scaling

In [71]:
sc_macd_line = pd.DataFrame(
    StandardScaler().fit_transform(macd_line),
    index=macd_line.index,columns=macd_line.columns
)
sc_signal_line = pd.DataFrame(
    StandardScaler().fit_transform(signal_line),
    index=signal_line.index,columns=signal_line.columns
)
sc_macd_histogram = pd.DataFrame(
    StandardScaler().fit_transform(macd_histogram),
    index=macd_histogram.index,columns=macd_histogram.columns
)

### Bollinger Bands

In [72]:
sma_20 = ten_year_data_fill['Close'].rolling(20).mean()
roll_std = ten_year_data_fill['Close'].rolling(20).std()

upper_band = sma_20 + (2 * roll_std)
lower_band = sma_20 - (2 * roll_std)

sma_20.columns = pd.MultiIndex.from_product([['SMA-20'], sma_20.columns])
upper_band.columns = pd.MultiIndex.from_product([['UpperBand'], upper_band.columns])
lower_band.columns = pd.MultiIndex.from_product([['LowerBand'], lower_band.columns])

bollinger_bands = pd.concat([
    sma_20,
    upper_band,
    lower_band
], axis=1)


#### Scaling

In [73]:
sc_bollinger_bands = pd.DataFrame(
    StandardScaler().fit_transform(bollinger_bands),
    index=bollinger_bands.index,columns=bollinger_bands.columns
)

### On-Balance Volume

In [74]:
OBV = pd.DataFrame(0, index=ten_year_data_fill['Close'].index, columns=ten_year_data_fill['Close'].columns)
for j in range(0, len(ten_year_data_fill['Close'].columns)):
    close_col = ten_year_data_fill['Close'].iloc[:,j].values
    volume_col = ten_year_data_fill['Volume'].iloc[:,j].values
    obv_col = np.zeros(len(close_col))
    for i in range(1, len(close_col)):
        if close_col[i] > close_col[i-1]:
            obv_col[i] = obv_col[i-1] + volume_col[i]
        elif close_col[i] < close_col[i-1]:
            obv_col[i] = obv_col[i-1] - volume_col[i]
        else:
            obv_col[i] = obv_col[i-1]
    OBV.iloc[:,j]=obv_col 

### Scaling

In [75]:
scaled_OBV = pd.DataFrame(
    StandardScaler().fit_transform(OBV),
    index=OBV.index,columns=OBV.columns
)

In [76]:
sc_market_index_returns.columns.nlevels > 1

False

## Training LSTM for AAPL

### Inputs For LSTM

In [77]:
aapl_features = pd.concat([
    moving_averages.xs('AAPL', level=1, axis=1),
    vol_21['AAPL'].rename('VOL_21'),
    vol_63['AAPL'].rename('VOL_63'),
    vol_252['AAPL'].rename('VOL_252'),
    market_index.rename(columns={'^GSPC':'MARKET_INDEX'}),
    rsi['AAPL'].rename('RSI'),
    macd_line['AAPL'].rename('MACD_LINE'),
    signal_line['AAPL'].rename('SIGNAL_LINE'),
    macd_histogram['AAPL'].rename('MACD_HIST'),
    bollinger_bands.xs('AAPL', level=1, axis=1),
    OBV['AAPL'].rename('OBV')
], axis=1)

In [78]:
aapl_features.dropna(how='any', inplace=True)

### Sliding Window Generation

In [79]:
timesteps = 20
horizon = 1

aapl_features_array = aapl_features.values
aapl_returns = ten_year_returns['AAPL'].dropna().values

X, y = [], []
for i in range(len(aapl_features_array) - timesteps - horizon):
    X.append(aapl_features_array[i:i+timesteps])
    y.append(aapl_returns[i+timesteps:i+timesteps+horizon])

X = np.array(X)
y = np.array(y)

In [80]:
len(aapl_returns)

4015

In [81]:
# splititng the data
split_point = int(0.8 * len(X))
X_train, X_test = X[:split_point], X[split_point:]
y_train, y_test = y[:split_point], y[split_point:]

In [82]:
# scaling training data
samples, timesteps, features = X_train.shape
X_train_2D = X_train.reshape(-1, features)
scaled = StandardScaler()
sc_X_train_2D = scaled.fit_transform(X_train_2D)
sc_X_train = X_train_2D.reshape(samples, timesteps, features)


In [83]:
# scaling test data
samples_test = X_test.shape[0]
X_test_2D = X_test.reshape(-1, features)
sc_X_test_2D = scaled.transform(X_test_2D)
sc_X_test = sc_X_test_2D.reshape(samples_test, timesteps, features)


In [84]:
model = Sequential()
model.add(LSTM(64, return_sequences=False, input_shape=(timesteps, X.shape[2])))
model.add(Dropout(0.3))
model.add(Dense(horizon))
model.compile(optimizer='adam', loss='mse')

In [85]:
history = model.fit(
    sc_X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_split=0.1
)

Epoch 1/30
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - loss: 0.2418 - val_loss: 0.0376
Epoch 2/30
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.1314 - val_loss: 0.0133
Epoch 3/30
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0714 - val_loss: 0.0063
Epoch 4/30
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0431 - val_loss: 0.0034
Epoch 5/30
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 0.0224 - val_loss: 0.0021
Epoch 6/30
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0130 - val_loss: 0.0013
Epoch 7/30
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0072 - val_loss: 8.7219e-04
Epoch 8/30
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0039 - val_loss: 6.9009e-04
Epoch 9/30
[1m85/85[0m [32m━━━━━━━━━━

In [86]:
loss = model.evaluate(X_test, y_test)

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 2.1858e-04 


In [87]:
rmse = math.sqrt(loss)
rmse

0.014784414270874364

In [94]:
rmse < aapl_returns.std()

True

#### Since rmse is less than the average volatility, that means the model is capturing patterms

In [95]:
y_pred = model.predict(X_test)

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


## Classical Decomposition 

In [None]:
decomposition = {}
for ticker in three_year_data_return.columns:
    series = three_year_data_return[ticker]
    decomposition[ticker] = seasonal_decompose(series, model='additive', period=21)

In [None]:
trend = decomposition["VZ"].trend
seasonsal = decomposition["VZ"].seasonal
residual = decomposition["VZ"].resid

In [None]:
plt.figure(figsize=(35,20))
plt.subplot(411)
plt.plot(three_year_data_return.index, trend, label = "Trend", color= "blue")
plt.legend(loc="upper left")
plt.subplot(412)
plt.plot(three_year_data_return.index, seasonsal, label = "Seasonality", color= "green")
plt.legend(loc="upper left")
plt.subplot(413)
plt.plot(three_year_data_return.index, residual, label = "Residual", color= "orange")
plt.legend(loc="upper left")
plt.tight_layout()
plt.show()

### STL Decomposition

In [None]:
stl_decomp = STL(daily_avg["Close Price"], period=7).fit()

In [None]:
plt.figure(figsize=(10, 4))
plt.subplot(411)
plt.plot(daily_avg["Date"] ,stl_decomp.observed, label="Original", color="orange")
plt.legend(loc="upper left")
plt.subplot(412)
plt.plot(daily_avg["Date"] ,stl_decomp.trend, label="Trend", color="grey")
plt.legend(loc="upper left")
plt.subplot(413)
plt.plot(daily_avg["Date"], stl_decomp.seasonal, label="Seasonal", color="grey")
plt.legend(loc="upper left")
plt.subplot(414)
plt.plot(daily_avg["Date"], stl_decomp.resid, label="Residual", color="black")
plt.legend(loc="upper left")
plt.tight_layout()
plt.show()

### Checking Stationarity
#### Performing Adf Test

In [None]:
adf_test = adfuller(daily_avg["Close Price"])
print(f"ADF Statistics: {round(adf_test[0], 3)}")
print(f"p-value: {round(adf_test[1], 3)}")
print("Critical Values")
for key, value in adf_test[4].items():
    print(f"{key}: {round(value, 3)}")


#### Performing KPSS Test

In [None]:
kpss_test = kpss(daily_avg["Close Price"], regression="ct")
print(f"KPSS Statistics: {round(kpss_test[0], 2)}")
print(f"p-value: {kpss_test[1]}")
for key, value in kpss_test[3].items():
    print(f"{key}: {value}")

#### Performing KS Test to check strict stationarity

In [None]:
def ks_test_stationarity(series):
    split = len(series) //2
    first_half = series[:split]
    second_half = series[split:]
    stat, p_value  = ks_2samp(first_half, second_half)
    return stat, p_value

ks_stat, ks_p_value = ks_test_stationarity(daily_avg["Close Price"])
print(ks_stat, ks_p_value)

#### Conclusion: 
#### There is a conflict between the adf and kpss tests because the provided data is of 21 days which is not suitable for either of the tests to provide accurate results. Thereby, stationarity of data remains ambiguous