In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import f as fisher_f
import datetime
from sklearn.linear_model import LinearRegression

In [41]:
df_daily = pd.read_csv("SAP_weekly_return_volatility_detailed.csv")
df_daily.head(5)

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,Return,Date,Week_Number,Year,Day,Weekday,mean_return,volatility
0,99.559998,98.339996,98.57,99.18,506300.0,94.896118,0.0,2019-01-02,0,2019,2,Wednesday,-0.105,3.59465
1,96.830002,95.449997,96.730003,95.459999,807800.0,91.3368,-3.751,2019-01-03,0,2019,3,Thursday,-0.105,3.59465
2,99.199997,96.910004,97.339996,98.739998,1038100.0,94.475128,3.436,2019-01-04,0,2019,4,Friday,-0.105,3.59465
3,100.190002,98.760002,99.440002,99.709999,861600.0,95.403229,0.982,2019-01-07,1,2019,7,Monday,0.6532,1.153409
4,101.480003,100.43,101.370003,101.269997,548600.0,96.895844,1.565,2019-01-08,1,2019,8,Tuesday,0.6532,1.153409


In [42]:
df_daily_copy = df_daily.copy()

In [43]:
months = list()
for i in range(len(df_daily_copy)):
    date = df_daily_copy["Date"][i]
    months.append(datetime.datetime.strptime(date,"%Y-%m-%d").month)
    
df_daily_copy["Month"] = months
df_daily_copy.head(5)

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,Return,Date,Week_Number,Year,Day,Weekday,mean_return,volatility,Month
0,99.559998,98.339996,98.57,99.18,506300.0,94.896118,0.0,2019-01-02,0,2019,2,Wednesday,-0.105,3.59465,1
1,96.830002,95.449997,96.730003,95.459999,807800.0,91.3368,-3.751,2019-01-03,0,2019,3,Thursday,-0.105,3.59465,1
2,99.199997,96.910004,97.339996,98.739998,1038100.0,94.475128,3.436,2019-01-04,0,2019,4,Friday,-0.105,3.59465,1
3,100.190002,98.760002,99.440002,99.709999,861600.0,95.403229,0.982,2019-01-07,1,2019,7,Monday,0.6532,1.153409,1
4,101.480003,100.43,101.370003,101.269997,548600.0,96.895844,1.565,2019-01-08,1,2019,8,Tuesday,0.6532,1.153409,1


In [44]:
def linn_regg(x1,y1):
    x = x1
    y = y1
    x_2 = x[:, np. newaxis ]
    
    lin_reg = LinearRegression ( fit_intercept = True )
    lin_reg.fit(x_2 , y)
    prediction = lin_reg.predict(x_2)
    
    sse = sum((y - prediction)**2)
    
    return sse

In [45]:
# x = np.array(df_daily_copy[(df_daily_copy["Year"] == 2019) & (df_daily_copy["Month"] == 1)]["Day"])
# y = np.array(df_daily_copy[(df_daily_copy["Year"] == 2019) & (df_daily_copy["Month"] == 1)]["Adj Close"])

### Values of L

In [46]:
l = list()
for i in [2019,2020]:
    for j in range(1,13):
        x = np.array(df_daily_copy[(df_daily_copy["Year"] == i) & (df_daily_copy["Month"] == j)]["Day"])
        y = np.array(df_daily_copy[(df_daily_copy["Year"] == i) & (df_daily_copy["Month"] == j)]["Adj Close"])
        l.append(linn_regg(x,y))

### Calculating L1, L2, K, F and P_Value

In [53]:
def compute(y,m,length,sse):
    l1_dict = dict()
    l2_dict = dict()
    l1l2 = dict()
    for i in range(2, len(np.array(df_daily_copy[(df_daily_copy["Year"] == y) & (df_daily_copy["Month"] == m)]["Day"])) - 2):
        x_l1 = np.array(df_daily_copy[(df_daily_copy["Year"] == y) & (df_daily_copy["Month"] == m)]["Day"])[:i]
        y_l1 = np.array(df_daily_copy[(df_daily_copy["Year"] == y) & (df_daily_copy["Month"] == m)]["Adj Close"])[:i]
        ret1 = linn_regg(x_l1,y_l1)
        l1_dict[i] = ret1

        x_l2 = np.array(df_daily_copy[(df_daily_copy["Year"] == y) & (df_daily_copy["Month"] == m)]["Day"])[i:]
        y_l2 = np.array(df_daily_copy[(df_daily_copy["Year"] == y) & (df_daily_copy["Month"] == m)]["Adj Close"])[i:]
        ret2 = linn_regg(x_l2,y_l2)
        l2_dict[i] = ret2

        l1l2[i] = ret1 + ret2
        
    k = min(l1l2,key=l1l2.get)
    l1 = l1_dict[k]
    l2 = l2_dict[k]
    f = ((sse - (l1 + l2)) / 2) * ((l1 + l2) / (length - 4))**-1
    p_value = fisher_f.cdf(f,2,length-4)
    return[l1,l2,k,f,p_value]

In [54]:
ind = 0
l1_list = list()
l2_list = list()
k_list = list()
f_list = list()
p_list = list()
for yr in [2019,2020]:
    for mo in range(1,13):
        length1 = len(np.array(df_daily_copy[(df_daily_copy["Year"] == yr) & (df_daily_copy["Month"] == mo)]["Day"]))
        sse_l = l[ind]
        l1, l2, k, f, p_value = compute(yr,mo,length1,sse_l)
        l1_list.append(l1)
        l2_list.append(l2)
        k_list.append(k)
        f_list.append(f)
        p_list.append(p_value)
        ind += 1

### 1. take years 1 and 2. For each month, compute the "candidate" days and decide whether there is a significant change of pricing trend in each month. Use 0.9 as critical value.

In [55]:
years = list(np.repeat(2019,12)) + list(np.repeat(2020,12))
months = list(np.arange(1,13)) + list(np.arange(1,13))
df_final = pd.DataFrame([years,months,l,l1_list,l2_list,k_list,f_list,p_list], \
                        index = ["Year","Month","L","L1","L2","K","F","P_value"]).T

df_final

Unnamed: 0,Year,Month,L,L1,L2,K,F,P_value
0,2019.0,1.0,60.430568,29.718294,0.174696,18.0,8.683287,0.997479
1,2019.0,2.0,11.291953,5.558311,2.113385,9.0,3.539234,0.94493
2,2019.0,3.0,34.768534,4.764037,11.981176,10.0,9.148777,0.997991
3,2019.0,4.0,486.754365,35.73454,1.731571,16.0,101.930787,1.0
4,2019.0,5.0,80.920829,15.193921,12.941313,11.0,16.885246,0.999926
5,2019.0,6.0,36.883271,2.833394,23.889758,5.0,3.041593,0.924061
6,2019.0,7.0,127.54777,7.108096,18.300564,12.0,36.178688,1.0
7,2019.0,8.0,43.746715,0.054137,27.334586,4.0,5.375275,0.985221
8,2019.0,9.0,24.245581,0.040775,9.609699,3.0,12.098977,0.99937
9,2019.0,10.0,212.748568,11.071285,14.77703,8.0,68.691226,1.0


### 2. how many months exhibit significant price changes for your stock ticker.

* All 24 months exhibit significant change for my stock. The is a trend change in every month.

### 3. are there more "changes" in year 1 or in year 2?

* Both years 2019 and 2020 have same no. of chanegs 12.