<h3>Imports</h3>

In [14]:
import yfinance as yf
import pandas as pd
import numpy as np
import pickle

<h3>Grabbing Tickers</h3>

In [9]:
all_tickers = "WFC MSFT INTC AMZN PYPL"
selected_stocks = yf.Tickers(all_tickers)
tickers = all_tickers.split(" ")

<h3>Getting the History for Each Ticker</h3>

In [10]:
selected_history = {}

for index in range(len(tickers)):
    selected_history[tickers[index]] = selected_stocks.tickers[index].history(period="1y")

<h3>Viewing Data</h3>

In [11]:
selected_history[tickers[0]].info()
selected_history[tickers[0]].tail(10)

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 253 entries, 2019-10-30 to 2020-10-29
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Open          253 non-null    float64
 1   High          253 non-null    float64
 2   Low           253 non-null    float64
 3   Close         253 non-null    float64
 4   Volume        253 non-null    int64  
 5   Dividends     253 non-null    float64
 6   Stock Splits  253 non-null    int64  
dtypes: float64(5), int64(2)
memory usage: 15.8 KB


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-10-16,23.01,23.23,22.610001,22.860001,43141200,0.0,0
2020-10-19,22.889999,22.91,22.51,22.540001,37423600,0.0,0
2020-10-20,22.73,23.129999,22.690001,22.809999,34751200,0.0,0
2020-10-21,22.82,22.950001,22.6,22.700001,29528300,0.0,0
2020-10-22,22.66,23.32,22.610001,23.25,32238300,0.0,0
2020-10-23,23.58,23.59,23.120001,23.280001,24558300,0.0,0
2020-10-26,22.969999,23.02,22.6,22.700001,34034100,0.0,0
2020-10-27,22.610001,22.610001,21.82,21.82,49636500,0.0,0
2020-10-28,21.23,21.48,20.799999,21.18,54770100,0.0,0
2020-10-29,21.049999,21.379999,20.76,21.139999,44223696,0.0,0


<h3>Methods for Calculated Data Points</h3>

In [12]:
def getIntradayChangeInfo(data_set):
    data_set["Intraday Change"] = data_set["Open"] - data_set["Close"]
    data_set["Intraday Pct Change"] = data_set["Intraday Change"] / data_set["Open"]
    
    previous_day_change = 0
    previous_day_pct_change = 0
    all_previous_day_change = []
    all_previous_day_pct_change = []
    
    for index, row in data_set.iterrows():
        all_previous_day_change.append(previous_day_change)
        all_previous_day_pct_change.append(previous_day_pct_change)
        previous_day_change = row["Intraday Change"]
        previous_day_pct_change = row["Intraday Pct Change"]
        
    data_set["Previous Day Change"] = all_previous_day_change
    data_set["Previous Day Pct Change"] = all_previous_day_pct_change
    
def getFiveDayAverageForIntraChange(data_set):
    previous_five_days = pd.Series(dtype="float64")
    previous_five_days_averages = []
    for index, row in data_set.iterrows():
        previous_five_days_averages.append(previous_five_days.mean())
        previous_five_days = updateFiveDays(row["Intraday Pct Change"], previous_five_days)
        
    data_set["Previous Five Day Average Pct Change"] = previous_five_days_averages
    data_set["Previous Five Day Average Pct Change"].fillna(0, inplace=True)
        

def updateFiveDays(current_day, five_days):
    if(five_days.size == 5):
        five_days["1"] = five_days["2"]
        five_days["2"] = five_days["3"]
        five_days["3"] = five_days["4"]
        five_days["4"] = five_days["5"]
        five_days["5"] = current_day
    else:
        five_days[str(five_days.size + 1)] = current_day
    return five_days

<h3>Adding Calculated Data Points</h3>

In [13]:
for key, history in selected_history.items():
    getIntradayChangeInfo(history)
    getFiveDayAverageForIntraChange(history)
    print("For {}".format(key))
    print(history.head(10))

For WFC
                 Open       High        Low      Close    Volume  Dividends  \
Date                                                                          
2019-10-30  49.628926  50.011568  49.380205  49.772415  16155900       0.00   
2019-10-31  49.523695  49.772412  48.930598  49.389771  18814700       0.00   
2019-11-01  49.868079  50.021136  49.657624  49.915909  16359100       0.00   
2019-11-04  50.298551  50.509003  49.973304  50.432476  17559900       0.00   
2019-11-05  50.451613  51.178635  50.451613  50.987312  25965300       0.00   
2019-11-06  50.977745  51.503880  50.862949  51.465614  22051100       0.00   
2019-11-07  51.938844  52.537621  51.938844  52.151314  23452500       0.51   
2019-11-08  51.987130  52.373433  51.726373  52.247883  14722100       0.00   
2019-11-11  51.948499  52.286515  51.890552  52.199596  10659400       0.00   
2019-11-12  51.909866  52.383093  51.784316  52.363777  15187600       0.00   

            Stock Splits  Intraday Change  

For AMZN
                   Open         High          Low        Close   Volume  \
Date                                                                      
2019-10-30  1760.239990  1782.380005  1759.119995  1779.989990  2449400   
2019-10-31  1775.989990  1792.000000  1771.479980  1776.660034  2781200   
2019-11-01  1788.010010  1797.449951  1785.209961  1791.439941  2790400   
2019-11-04  1801.010010  1815.060059  1801.010010  1804.660034  2771900   
2019-11-05  1809.160034  1810.250000  1794.000000  1801.709961  1885500   
2019-11-06  1801.000000  1802.500000  1788.579956  1795.770020  2029800   
2019-11-07  1803.760010  1805.900024  1783.479980  1788.199951  2651100   
2019-11-08  1787.890015  1789.880005  1774.040039  1785.880005  2123300   
2019-11-11  1778.000000  1780.000000  1767.130005  1771.650024  1946000   
2019-11-12  1774.660034  1786.219971  1771.910034  1778.000000  2037600   

            Dividends  Stock Splits  Intraday Change  Intraday Pct Change  \
Date         

<h3>Serializing the Data</h3>

In [19]:
with open("data/model_data.pkl", mode="wb") as fwb:
    pickle.dump(selected_history, fwb)
    
with open("data/ticker_data.pkl", mode="wb") as fwb:
    pickle.dump(all_tickers, fwb)