In [1]:
print("hello")

hello


In [2]:
!pip3 install yfinance

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [3]:
import os
import pandas as pd
import yfinance as yf

def get_stock(ticker, start_date, end_date, s_window, l_window):
    try:
        df = yf.download(ticker, start=start_date, end=end_date)
        df['Return'] = df['Adj Close'].pct_change()
        df['Return'].fillna(0, inplace=True)
        df['Date'] = df.index
        df['Date'] = pd.to_datetime(df['Date'])
        df['Month'] = df['Date'].dt.month
        df['Year'] = df['Date'].dt.year
        df['Day'] = df['Date'].dt.day
        for col in ['Open', 'High', 'Low', 'Close', 'Adj Close']:
            df[col] = df[col].round(2)
        # df['Weekday'] = df['Date'].dt.weekday_name
        df['Weekday'] = df['Date'].dt.day_name()
        df['Week_Number'] = df['Date'].dt.strftime('%U')
        df['Year_Week'] = df['Date'].dt.strftime('%Y-%U')
        df['Short_MA'] = df['Adj Close'].rolling(
            window=s_window, min_periods=1).mean()
        df['Long_MA'] = df['Adj Close'].rolling(
            window=l_window, min_periods=1).mean()
        col_list = ['Date', 'Year', 'Month', 'Day', 'Weekday',
                    'Week_Number', 'Year_Week', 'Open',
                    'High', 'Low', 'Close', 'Volume', 'Adj Close',
                    'Return', 'Short_MA', 'Long_MA']
        num_lines = len(df)
        df = df[col_list]
        print('read ', num_lines, ' lines of data for ticker: ', ticker)
        return df
    except Exception as error:
        print(error)
        return None


here = os.path.abspath('')
input_dir = os.path.abspath(os.path.join(here, os.pardir))
tickers = ['SPY']
for ticker in tickers:
    try:
        output_file = os.path.join(input_dir, ticker + '.csv')
        df = get_stock(ticker, start_date='2016-01-01', end_date='2020-12-31',
                       s_window=14, l_window=50)
        df.to_csv(output_file, index=False)
        print('wrote ' + str(len(df)) + ' lines to file: ' + output_file)
    except Exception as e:
        print(e)
        print('failed to get Yahoo stock data for ticker: ', ticker)

[*********************100%%**********************]  1 of 1 completed

read  1258  lines of data for ticker:  SPY
wrote 1258 lines to file: /Users/ananyasingh/cs677/SPY.csv



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Return'].fillna(0, inplace=True)


In [6]:
ticker='SPY'
ticker_file = os.path.join(ticker + '.csv')

try:
    with open(ticker_file) as f:
        lines = f.read().splitlines()
    print('opened file for ticker: ', ticker)

except Exception as e:
    print(e)
    print('failed to read stock data for ticker: ', ticker)

opened file for ticker:  SPY


In [7]:
lines

['Date,Year,Month,Day,Weekday,Week_Number,Year_Week,Open,High,Low,Close,Volume,Adj Close,Return,Short_MA,Long_MA',
 '2016-01-04,2016,1,4,Monday,01,2016-01,200.49,201.03,198.59,201.02,222353500,172.95,0.0,172.95,172.95',
 '2016-01-05,2016,1,5,Tuesday,01,2016-01,201.4,201.9,200.05,201.36,110845800,173.25,0.0016911857277299713,173.1,173.1',
 '2016-01-06,2016,1,6,Wednesday,01,2016-01,198.34,200.06,197.6,198.82,152112600,171.06,-0.012614080547340034,172.42,172.42',
 '2016-01-07,2016,1,7,Thursday,01,2016-01,195.33,197.44,193.59,194.05,213436100,166.96,-0.023991575117910724,171.055,171.055',
 '2016-01-08,2016,1,8,Friday,01,2016-01,195.19,195.85,191.58,191.92,209817200,165.12,-0.010976780267532016,169.868,169.868',
 '2016-01-11,2016,1,11,Monday,02,2016-02,193.01,193.41,189.82,192.11,187941300,165.29,0.0009900625955385767,169.105,169.105',
 '2016-01-12,2016,1,12,Tuesday,02,2016-02,193.82,194.55,191.14,193.66,172330500,166.62,0.00806840535098341,168.75,168.75',
 '2016-01-13,2016,1,13,Wednesday,0

In [10]:
years = [ int(lines[i].split(',')[1])  for i in range(1, len(lines))]
weekdays = [ lines[i].split(',')[4] for i in range(1, len(lines))]
ret = [float(lines[i].split(',')[13]) for i in range(1,len(lines))]

In [18]:
plus_indices = {i for i in range(len(ret)) if ret[i] >= 0}
r_plus = [ret[i] for i in plus_indices]

minus_indices = {i for i in range(len(ret)) if ret[i] < 0}
r_minus = [ret[i] for i in plus_indices]
    
all_indices = {i for i in range(len(ret))}


In [19]:
def returns(weekday,year,pm):
  '''
  Weekday in {'Monday','Tuesday','Wednesday','Thursday','Friday'}
  Year in {2016,2017,2018,2019,2020}
  pm any of the three sets plus_indices, minus_indices, or all_indices

  Returns the set of specified returns
  '''
  ind_year = {i for i in range(len(years)) if years[i] == year}
  ind_weekday = {i for i in range(len(weekdays)) if weekdays[i] == weekday}
  desired_indices = pm.intersection(ind_year).intersection(ind_weekday)

  return [ret[i] for i in desired_indices]
    

In [20]:
import numpy as np
year_dict = {0:2016, 1:2017, 2:2018, 3:2019, 4:2020}
weekday_dict = {0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday'}

all_tables = []

for j in range(5):
    y = year_dict[j]
    year_summary = []
    
    for i in range(5):
        w = weekday_dict[i]
        all = returns(w, y, all_indices)
        neg = returns(w, y, minus_indices)
        pos = returns(w, y, plus_indices)
        
        year_summary.append([np.mean(all), np.std(all), len(neg), np.mean(neg), np.std(neg), len(pos), np.mean(pos), np.std(pos)])
    
    all_tables.append(year_summary)

In [21]:
import pandas as pd

col_list = ['mean(R)','std(R)','|R_minus|','mean(R_minus)','std(R_minus)','|R_plus|','mean(R_plus)','std(R_plus)']
row_list = weekday_dict.values()

l = []
for i in range(5):
    df = pd.DataFrame(all_tables[i],columns=col_list, index=row_list)
    df = df.style.set_caption(str(year_dict[i])+' Summary by Day of Week')
    l.append(df)

l[0]

Unnamed: 0,mean(R),std(R),|R_minus|,mean(R_minus),std(R_minus),|R_plus|,mean(R_plus),std(R_plus)
Monday,0.000777,0.007134,20,-0.004818,0.005123,26,0.005081,0.00522
Tuesday,0.001052,0.00837,20,-0.006988,0.004894,32,0.006077,0.005764
Wednesday,0.000642,0.00756,24,-0.005432,0.005622,28,0.005849,0.004509
Thursday,0.000419,0.00679,23,-0.004776,0.005407,28,0.004686,0.004426
Friday,-0.000185,0.010318,26,-0.006848,0.00858,25,0.006746,0.006814


In [22]:
l[1]

Unnamed: 0,mean(R),std(R),|R_minus|,mean(R_minus),std(R_minus),|R_plus|,mean(R_plus),std(R_plus)
Monday,0.00134,0.003886,20,-0.00179,0.00155,26,0.003748,0.003397
Tuesday,0.000417,0.004415,23,-0.003174,0.002971,28,0.003367,0.002992
Wednesday,0.001082,0.004456,19,-0.002579,0.003918,33,0.00319,0.003208
Thursday,-0.000208,0.00493,25,-0.003773,0.00407,26,0.003219,0.002789
Friday,0.001376,0.002995,20,-0.001573,0.000941,31,0.003279,0.002225


In [24]:
import pandas as pd
# Make aggregate table for all years
all_years = []
for i in range(5):
    w = weekday_dict[i]
    all = []
    neg = []
    pos = []
    
    for j in range(5):
        y = year_dict[j]
        all.extend(returns(w, y, all_indices))
        neg.extend(returns(w, y, minus_indices))
        pos.extend(returns(w, y, plus_indices))
    
    all_years.append([np.mean(all), np.std(all), len(neg), np.mean(neg), np.std(neg), len(pos), np.mean(pos), np.std(pos)])
    
df = pd.DataFrame(all_years,columns=col_list, index=row_list)
df = df.style.set_caption('All Years Summary by Day of Week')
df

Unnamed: 0,mean(R),std(R),|R_minus|,mean(R_minus),std(R_minus),|R_plus|,mean(R_plus),std(R_plus)
Monday,0.000509,0.013913,99,-0.008613,0.014802,137,0.0071,0.00846
Tuesday,0.001215,0.011465,112,-0.006709,0.006935,146,0.007294,0.0105
Wednesday,0.000906,0.011115,106,-0.007369,0.010126,151,0.006715,0.007515
Thursday,-2.1e-05,0.011676,117,-0.007136,0.01244,137,0.006055,0.006362
Friday,0.000556,0.011111,111,-0.007257,0.008449,142,0.006663,0.008895
