In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
import datetime
#we can make metrics selectable from webpage
# %timeit
class Stocks():
    def __init__(self, tickers, metrics, start_date, end_date):  
        self.tickers = tickers
        self.metrics = metrics
        self.start_date = start_date
        self.end_date = end_date
    
    def get_stock_info(self):                                 
        # Check if all tickers are valid
        for ticker in self.tickers:
            try:
                yf.Ticker(ticker)
            except:
                return "invalid ticker: {}".format(ticker)

        # Make sure start_date and end_date are in the correct format
        try:
            start_date = datetime.datetime.strptime(self.start_date, "%Y-%m-%d")
            end_date = datetime.datetime.strptime(self.end_date, "%Y-%m-%d")
        except ValueError:
            return "incorrect date format. Use year-month-day (e.g. 2022-01-31)"
    
        # Return the stock data
        df = yf.download(self.tickers, start=self.start_date, end=self.end_date).loc[:, self.metrics]
        return df

    def nan_dates(self, df):
        #get null dates
        dates = []
        x = df.loc[:, [self.metrics[0]]].columns
        for col in x:
            date = df[df[col].isnull()].index.max().date()
            if pd.isnull(date):
                date = start_date
            else:
                position = df.index.get_loc(str(date))
                date = str(df.index[position+1].date())   
            dates.append([col, date])
        
        #print_dates
        l= []
        i = 0
        while i < len(x):
            l.append(f"First entry for {dates[i][0][1]} is in {dates[i][1]}")
            i+=1
        return l
    
    def remove_rows_with_nan(self, df):
        return df.dropna()
    
    def get_returns(self, df, metrics):
        ret_df = np.log(df[self.metrics]/df[self.metrics].shift(1))
        return ret_df.iloc[1:,:]
    
    def get_corr(self, df):
        return df.corr("pearson")

In [2]:
tickers = ["QQQ", "VOO"]
metrics = ["Open", "Close","Adj Close"]
start_date="2010-01-01"
end_date="2020-01-01"

In [3]:
s1 = Stocks(tickers, metrics, start_date, end_date)

In [4]:
df = s1.get_stock_info()

[*********************100%***********************]  2 of 2 completed


In [5]:
df

Unnamed: 0_level_0,Open,Open,Close,Close,Adj Close,Adj Close
Unnamed: 0_level_1,QQQ,VOO,QQQ,VOO,QQQ,VOO
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2010-01-04,46.330002,,46.419998,,41.068943,
2010-01-05,46.389999,,46.419998,,41.068943,
2010-01-06,46.400002,,46.139999,,40.821220,
2010-01-07,46.209999,,46.169998,,40.847771,
2010-01-08,46.070000,,46.549999,,41.183960,
...,...,...,...,...,...,...
2019-12-24,212.000000,295.359985,211.919998,295.160004,208.175812,281.141174
2019-12-26,212.259995,295.559998,213.789993,296.670013,210.012787,282.579407
2019-12-27,214.539993,297.510010,213.610001,296.670013,209.835983,282.579407
2019-12-30,213.500000,296.750000,212.210007,295.040009,208.460709,281.026855


In [6]:
s1.nan_dates(df)

['First entry for QQQ is in 2010-01-01',
 'First entry for VOO is in 2010-09-09']

In [7]:
new_df = s1.remove_rows_with_nan(df)
new_df

Unnamed: 0_level_0,Open,Open,Close,Close,Adj Close,Adj Close
Unnamed: 0_level_1,QQQ,VOO,QQQ,VOO,QQQ,VOO
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2010-09-09,46.660000,102.500000,46.430000,101.320000,41.280598,80.010017
2010-09-10,46.480000,101.680000,46.599998,101.779999,41.431740,80.373276
2010-09-13,46.959999,102.959999,47.250000,103.059998,42.009655,81.384056
2010-09-14,47.189999,102.839996,47.450001,103.040001,42.187466,81.368271
2010-09-15,47.330002,102.620003,47.750000,103.300003,42.454201,81.573586
...,...,...,...,...,...,...
2019-12-24,212.000000,295.359985,211.919998,295.160004,208.175812,281.141174
2019-12-26,212.259995,295.559998,213.789993,296.670013,210.012787,282.579407
2019-12-27,214.539993,297.510010,213.610001,296.670013,209.835983,282.579407
2019-12-30,213.500000,296.750000,212.210007,295.040009,208.460709,281.026855


In [8]:
ret_df = s1.get_returns(new_df, metrics)
ret_df

Unnamed: 0_level_0,Open,Open,Close,Close,Adj Close,Adj Close
Unnamed: 0_level_1,QQQ,VOO,QQQ,VOO,QQQ,VOO
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2010-09-10,-0.003865,-0.008032,0.003655,0.004530,0.003655,0.004530
2010-09-13,0.010274,0.012510,0.013852,0.012498,0.013852,0.012498
2010-09-14,0.004886,-0.001166,0.004224,-0.000194,0.004224,-0.000194
2010-09-15,0.002962,-0.002141,0.006303,0.002520,0.006303,0.002520
2010-09-16,0.006948,0.003890,0.003971,-0.000387,0.003971,-0.000387
...,...,...,...,...,...,...
2019-12-24,0.000000,-0.000440,0.000519,0.000000,0.000519,0.000000
2019-12-26,0.001226,0.000677,0.008785,0.005103,0.008785,0.005103
2019-12-27,0.010684,0.006576,-0.000842,0.000000,-0.000842,0.000000
2019-12-30,-0.004859,-0.002558,-0.006576,-0.005509,-0.006576,-0.005509


In [9]:
s1.get_corr(ret_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,Open,Close,Close,Adj Close,Adj Close
Unnamed: 0_level_1,Unnamed: 1_level_1,QQQ,VOO,QQQ,VOO,QQQ,VOO
Open,QQQ,1.0,0.913792,0.317237,0.331237,0.316157,0.330436
Open,VOO,0.913792,1.0,0.307631,0.368,0.30827,0.361565
Close,QQQ,0.317237,0.307631,1.0,0.920333,0.999498,0.923003
Close,VOO,0.331237,0.368,0.920333,1.0,0.920758,0.997497
Adj Close,QQQ,0.316157,0.30827,0.999498,0.920758,1.0,0.923442
Adj Close,VOO,0.330436,0.361565,0.923003,0.997497,0.923442,1.0


In [10]:
s1.get_corr(new_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,Open,Close,Close,Adj Close,Adj Close
Unnamed: 0_level_1,Unnamed: 1_level_1,QQQ,VOO,QQQ,VOO,QQQ,VOO
Open,QQQ,1.0,0.990496,0.999728,0.990198,0.99967,0.994302
Open,VOO,0.990496,1.0,0.990238,0.999691,0.989231,0.99878
Close,QQQ,0.999728,0.990238,1.0,0.990464,0.99993,0.994537
Close,VOO,0.990198,0.999691,0.990464,1.0,0.989443,0.999046
Adj Close,QQQ,0.99967,0.989231,0.99993,0.989443,1.0,0.993983
Adj Close,VOO,0.994302,0.99878,0.994537,0.999046,0.993983,1.0
