<h1>Using the yfinance Library</h1>

<h3>Imports</h3>

In [241]:
import yfinance as yf
import math
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import explained_variance_score, mean_absolute_error, r2_score, mean_squared_error

import pandas as pd
import numpy as np
import plotly.offline as plyo
import cufflinks as cf

<h3>Grabbing Tickers</h3>

In [242]:
all_tickers = "WFC MSFT INTC AMZN"
selected_stocks = yf.Tickers(all_tickers)
tickers = all_tickers.split(" ")

<h3>Getting the History for Each Ticker</h3>

In [243]:
selected_history = {}

for index in range(len(tickers)):
    selected_history[tickers[index]] = selected_stocks.tickers[index].history(period="1y")

<h3>Viewing Data</h3>

In [244]:
selected_history[tickers[0]].info()
selected_history[tickers[0]].head(10)

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 253 entries, 2019-10-24 to 2020-10-23
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Open          253 non-null    float64
 1   High          253 non-null    float64
 2   Low           253 non-null    float64
 3   Close         253 non-null    float64
 4   Volume        253 non-null    int64  
 5   Dividends     253 non-null    float64
 6   Stock Splits  253 non-null    int64  
dtypes: float64(5), int64(2)
memory usage: 15.8 KB


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-10-24,48.46186,48.97843,48.46186,48.882767,16695400,0.0,0
2019-10-25,48.863638,49.485432,48.787107,49.332375,17433300,0.0,0
2019-10-28,49.686315,49.791543,49.332371,49.408901,23914800,0.0,0
2019-10-29,49.265411,50.088096,49.265411,49.906338,21063000,0.0,0
2019-10-30,49.628926,50.011568,49.380205,49.772415,16155900,0.0,0
2019-10-31,49.523695,49.772412,48.930598,49.389771,18814700,0.0,0
2019-11-01,49.868079,50.021136,49.657624,49.915909,16359100,0.0,0
2019-11-04,50.298551,50.509003,49.973304,50.432476,17559900,0.0,0
2019-11-05,50.451613,51.178635,50.451613,50.987312,25965300,0.0,0
2019-11-06,50.977745,51.50388,50.862949,51.465614,22051100,0.0,0


<h3>Methods for Calculated Data Points</h3>

In [245]:
def getIntradayChangeInfo(data_set):
    data_set["Intraday Change"] = data_set["Open"] - data_set["Close"]
    data_set["Intraday Pct Change"] = data_set["Intraday Change"] / data_set["Open"]
    
    previous_day_change = 0
    previous_day_pct_change = 0
    all_previous_day_change = []
    all_previous_day_pct_change = []
    
    for index, row in data_set.iterrows():
        all_previous_day_change.append(previous_day_change)
        all_previous_day_pct_change.append(previous_day_pct_change)
        previous_day_change = row["Intraday Change"]
        previous_day_pct_change = row["Intraday Pct Change"]
        
    data_set["Previous Day Change"] = all_previous_day_change
    data_set["Previous Day Pct Change"] = all_previous_day_pct_change

<h3>Adding Calculated Data Points</h3>

In [246]:
for key, history in selected_history.items():
    getIntradayChangeInfo(history)
    print("For {}".format(key))
    print(history.head())

For WFC
                 Open       High        Low      Close    Volume  Dividends  \
Date                                                                          
2019-10-24  48.461860  48.978430  48.461860  48.882767  16695400        0.0   
2019-10-25  48.863638  49.485432  48.787107  49.332375  17433300        0.0   
2019-10-28  49.686315  49.791543  49.332371  49.408901  23914800        0.0   
2019-10-29  49.265411  50.088096  49.265411  49.906338  21063000        0.0   
2019-10-30  49.628926  50.011568  49.380205  49.772415  16155900        0.0   

            Stock Splits  Intraday Change  Intraday Pct Change  \
Date                                                             
2019-10-24             0        -0.420907            -0.008685   
2019-10-25             0        -0.468737            -0.009593   
2019-10-28             0         0.277414             0.005583   
2019-10-29             0        -0.640927            -0.013010   
2019-10-30             0        -0.143489 

<h3>Analysis Methods</h3>

In [247]:
def createLinearRegression(X, y):
    lr = LinearRegression()
    lr.fit(X, y)
    return lr

def testLinearRegression(model, X_train, X_test, y_train, y_test):
    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)
    
    preds = model.predict(X_test)
    score = explained_variance_score(y_test, preds)
    mae = mean_absolute_error(y_test, preds)
    rmse = math.sqrt(mean_squared_error(y_test, preds))
    r2 = r2_score(y_test, preds)
    
    print("Train Score: {}\nTest Score: {}"
          .format(train_score, test_score))
    print("Score Details = {:.5f} | MAE = {:.3f} | RMSE = {:.3f} | R2 = {:.5f}"
          .format(score, mae, rmse, r2))
    print("\n")
    

def conductLinearRegressionAnalysis(data_set, features, target):
    X = data_set[features]
    y = data_set[target]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)
    
    model= createLinearRegression(X_train, y_train)
    testLinearRegression(model, X_train, X_test, y_train, y_test)
    
    return model

<h3>Models for Selected Tickers</h3>

In [248]:
features = ["Previous Day Pct Change", "Previous Day Change","High", "Low", "Volume", "Open"]
target = "Intraday Pct Change"
models = {}

for key, history in selected_history.items():
    print("For: {}".format(key))
    models[key] = conductLinearRegressionAnalysis(history, features, target)

For: WFC
Train Score: 0.7111372636229771
Test Score: 0.8541077534178021
Score Details = 0.85780 | MAE = 0.007 | RMSE = 0.009 | R2 = 0.85411


For: MSFT
Train Score: 0.7236760108599676
Test Score: 0.6846308308392879
Score Details = 0.69842 | MAE = 0.008 | RMSE = 0.012 | R2 = 0.68463


For: INTC
Train Score: 0.7828458033420452
Test Score: 0.8455861780513523
Score Details = 0.84971 | MAE = 0.007 | RMSE = 0.009 | R2 = 0.84559


For: AMZN
Train Score: 0.7570279112122215
Test Score: 0.7373800556669776
Score Details = 0.73761 | MAE = 0.006 | RMSE = 0.009 | R2 = 0.73738




<h3>Getting Data for Last Month</h3>

In [249]:
selected_1m_history = {}

for index in range(len(tickers)):
    selected_1m_history[tickers[index]] = selected_stocks.tickers[index].history(period="1mo")
    getIntradayChangeInfo(selected_1m_history[tickers[index]])

<h3>Prediction Methods</h3>

In [250]:
def createPredictions(model, data_set, features):
    return model.predict(data_set[features])

<h3>Predicting the Last Month</h3>

In [251]:
for key, history in selected_1m_history.items():
    history["Prediction"] = createPredictions(models[key], history, features)

<h3>Plotting Methods</h3>

In [252]:
def plotResults(data_set, plot_values):
    plyo.iplot(data_set[plot_values].iplot(asFigure=True))

<h3>Plotting Actual vs Prediction</h3>

In [253]:
plot_values = [target, "Prediction"]

for key, history in selected_1m_history.items():
    print("For {}:".format(key))
    plotResults(history, plot_values)

For WFC:


For MSFT:


For INTC:


For AMZN:


<h3>Selecting a Stock Based on Predictions</h3>

In [254]:
def getRecommendation(data_set, based_on):
    final_predictions = {}
    for key, history in selected_1m_history.items():
        final_predictions[key] = history.iloc[-1]["Prediction"]

    return max(final_predictions, key=final_predictions.get)

In [255]:
print(getRecommendation(selected_1m_history, "Prediction"))

WFC
