In [1]:
from matplotlib import pyplot as plt
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date
import c_stock_data as SD

# Set input parameters to fetch data

In [2]:
FILE_PATH = 'Data/stock_df.parquet'
START_DATE = date(2020, 1, 1)
END_DATE = (date.today() - pd.offsets.BDay(1)).date()  # Go back 1 business day from current day
# END_DATE = date(2022, 9, 30)
TICKERS = ['GOOGL', 'MSFT', 'TSLA', 'AAPL', 'NFLX', 'AMZN']
# TICKERS = ['GOOGL', 'MSFT', 'TSLA']

# Fetch downloaded data

1. Check if downloaded data exists
2. Check if downloaded data has requested tickers
3. Check if downloaded data has requested time period

### 1. Check for downloaded data

In [3]:
sd_one = SD.YFinData(tickers=TICKERS, start_date=START_DATE, end_date=END_DATE, file_path=FILE_PATH)

In [4]:
sd_one.get_df()

File found - using stored data
Tickers to be added:  
Starts as early as possible
Should download earlier data for existing tickers
[*********************100%***********************]  6 of 6 completed


Unnamed: 0,Date,Ticker,Adj_Close,Close,High,Low,Open,Volume
0,2019-12-31,GOOGL,66.969498,66.969498,67.032997,66.606499,66.789497,19514000
1,2019-12-31,MSFT,153.745209,157.699997,157.770004,156.449997,156.770004,18369400
2,2019-12-31,TSLA,27.888666,27.888666,28.086000,26.805332,27.000000,154285500
3,2020-01-02,GOOGL,68.433998,68.433998,68.433998,67.324501,67.420502,27278000
4,2020-01-02,MSFT,156.591995,160.619995,160.729996,158.330002,158.779999,22622100
...,...,...,...,...,...,...,...,...
4198,2022-10-07,MSFT,234.240005,234.240005,241.320007,233.169998,240.899994,37732400
4199,2022-10-07,TSLA,223.070007,223.070007,234.570007,222.020004,233.940002,83592300
4200,2022-10-10,GOOGL,97.860001,97.860001,99.099998,97.010002,99.000000,18225100
4201,2022-10-10,MSFT,229.250000,229.250000,234.559998,226.729996,233.050003,29743600


#### Get ticker stock price

In [5]:
df = sd_one.get_df()

In [6]:
df.sort_values(['Date', 'Ticker']).tail(n=20)

Unnamed: 0,Date,Ticker,Adj_Close,Close,High,Low,Open,Volume
4172,2022-10-05,NFLX,236.729996,236.729996,241.589996,227.380005,238.759995,10825600
4193,2022-10-05,TSLA,240.809998,240.809998,246.669998,233.270004,245.009995,86982700
4173,2022-10-06,AAPL,145.429993,145.429993,147.539993,145.220001,145.809998,68402200
4174,2022-10-06,AMZN,120.300003,120.300003,121.529999,119.5,120.769997,42253800
4194,2022-10-06,GOOGL,101.419998,101.419998,102.93,100.589996,100.68,22324000
4195,2022-10-06,MSFT,246.789993,246.789993,250.339996,246.080002,247.929993,20239900
4175,2022-10-06,NFLX,240.020004,240.020004,244.279999,233.910004,236.679993,9188900
4196,2022-10-06,TSLA,238.130005,238.130005,244.580002,235.350006,239.440002,69298400
4176,2022-10-07,AAPL,140.089996,140.089996,143.100006,139.449997,142.539993,85859100
4177,2022-10-07,AMZN,114.559998,114.559998,118.169998,113.879997,118.0,54626400


In [7]:
df.sort_values(['Date', 'Ticker']).head(n=20)

Unnamed: 0,Date,Ticker,Adj_Close,Close,High,Low,Open,Volume
2079,2019-12-30,AAPL,71.517334,72.879997,73.172501,71.305,72.364998,144114400
2080,2019-12-30,AMZN,92.344498,92.344498,94.199997,92.030998,93.699997,73494000
2081,2019-12-30,NFLX,323.309998,323.309998,329.190002,322.859985,329.079987,4311500
2082,2019-12-31,AAPL,72.039879,73.412498,73.419998,72.379997,72.482498,100805600
2083,2019-12-31,AMZN,92.391998,92.391998,92.663002,91.611504,92.099998,50130000
0,2019-12-31,GOOGL,66.969498,66.969498,67.032997,66.606499,66.789497,19514000
1,2019-12-31,MSFT,153.745209,157.699997,157.770004,156.449997,156.770004,18369400
2084,2019-12-31,NFLX,323.570007,323.570007,324.920013,321.089996,322.0,3713300
2,2019-12-31,TSLA,27.888666,27.888666,28.086,26.805332,27.0,154285500
2085,2020-01-02,AAPL,73.683571,75.087502,75.150002,73.797501,74.059998,135480400


In [8]:
df.loc[df.duplicated(['Date', 'Ticker']), :]

Unnamed: 0,Date,Ticker,Adj_Close,Close,High,Low,Open,Volume


In [9]:
df.loc[df.Date == '2022-09-29', :]

Unnamed: 0,Date,Ticker,Adj_Close,Close,High,Low,Open,Volume


In [10]:
df.pivot(index='Date', columns='Ticker', values='Adj_Close')

Ticker,AAPL,AMZN,GOOGL,MSFT,NFLX,TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-30,71.517334,92.344498,,,323.309998,
2019-12-31,72.039879,92.391998,66.969498,153.745209,323.570007,27.888666
2020-01-02,73.683571,94.900497,68.433998,156.591995,329.809998,28.684000
2020-01-03,72.967201,93.748497,68.075996,154.642166,325.899994,29.534000
2020-01-06,73.548630,95.143997,69.890503,155.041870,335.829987,30.102667
...,...,...,...,...,...,...
2022-10-04,146.100006,121.089996,101.639999,248.880005,240.740005,249.440002
2022-10-05,146.399994,120.949997,101.430000,249.199997,236.729996,240.809998
2022-10-06,145.429993,120.300003,101.419998,246.789993,240.020004,238.130005
2022-10-07,140.089996,114.559998,98.680000,234.240005,224.750000,223.070007


In [11]:
sd_one.write_df()

Dataframe is saved to:  Data/stock_df.parquet
