In [13]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance

import time

# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join

In [88]:
# Define path to files
# For Linux
BASE_PATH = "../../../Documents/finance_files/"
DATA_PATH = BASE_PATH
STOCKS_PATH = BASE_PATH +"stocks/"

# Start date defaults
S_YEAR = 2017
S_MONTH = 1
S_DAY = 3
S_DATE_STR = "2017-01-03"
S_DATE_DATETIME = dt.datetime(S_YEAR, S_MONTH, S_DAY)

# End date defaults
E_YEAR = 2021
E_MONTH = 8
E_DAY = 19
E_DATE_STR = "2021-08-19"
E_DATE_DATETIME = dt.datetime(E_YEAR, E_MONTH, E_DAY)

In [89]:
stocks_not_downloaded = []
missing_stocks = []

In [90]:
def get_tickers():
    try:
        df = pd.read_csv(STOCKS_PATH + 'stocks.csv')
    except FileNotFoundError:
        pass
        print("Stocks File Doesn't Exist")
    else:
        return df
tickers = get_tickers()['ticker'].tolist()
tickers.sort()
len(tickers)
print(tickers)

['AMD', 'INTC', 'SPY']


In [91]:
# Function that gets a dataframe by providing a ticker and starting date
def save_to_csv_from_yahoo(ticker, syear, smonth, sday, eyear, emonth, eday):
    # Defines the time periods to use
    start = dt.datetime(syear, smonth, sday)
    end = dt.datetime(eyear, emonth, eday)
    
    try:
        print("Get Data for : ", ticker)
        # Reads data into a dataframe
        #df = web.DataReader(ticker, 'yahoo', start, end)['Adj Close']
        df = yf.download(ticker, start, end)['Adj Close']
    
        # Wait 10 seconds
        time.sleep(5)
    
        # Save data to a CSV file linux
        df.to_csv(DATA_PATH + ticker + '.csv')
    except Exception as ex:
        stocks_not_downloaded.append(ticker)
        print("Couldn't Get Data for :", ticker)

In [92]:
def get_df_from_csv(folder, ticker):
    try:
        df = pd.read_csv(folder + ticker + '.csv', index_col='Date', 
                         parse_dates=True)
    except FileNotFoundError:
        pass
        print("File Doesn't Exist")
    else:
        return df

In [122]:
def update_stock_data(ticker):
    orig_df = get_df_from_csv(DATA_PATH, ticker)
    if orig_df is not None:
        print('there is data')
        # Drop the unnamed column
        oorig_df = orig_df.drop(orig_df.columns[orig_df.columns.str.contains('unnamed',case = False)],
          axis = 1, inplace = True)

        # Drop the daily_return column
        if 'daily_return' in orig_df.columns:
            orig_df = orig_df.drop('daily_return',axis = 1, inplace = True)

        #check if it needs more data
        if(orig_df.index[-1] != E_DATE_DATETIME):
            edate = orig_df.index[-1]
            updated_df = yf.download(ticker, edate, dt.date.today())['Adj Close']
            if(updated_df is not None):
                orig_df = pd.concat([orig_df, updated_df])

        display(orig_df)
        return orig_df
    else:
        print('there is no data')
        save_to_csv_from_yahoo(ticker, S_YEAR, S_MONTH, S_DAY, E_YEAR, E_MONTH, E_DAY)


In [123]:
def save_dataframe_to_csv(df, folder, ticker):
    df.to_csv(folder + ticker + '.csv')

In [124]:
def update_stocks(stock_folder):
    
    for x in tickers:
        try:
            print("Working on :", x)
            new_df = update_stock_data(x)
            save_dataframe_to_csv(new_df, stock_folder, x)
        except Exception as ex:
            print(ex)

In [126]:
update_stocks(DATA_PATH)

Working on : AMD
there is data


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2017-01-03,11.430000
2017-01-04,11.430000
2017-01-05,11.240000
2017-01-06,11.320000
2017-01-09,11.490000
...,...
2024-07-30,138.440002
2024-07-31,144.479996
2024-08-01,132.539993
2024-08-02,132.500000


Working on : INTC
there is data


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2017-01-03,30.069319
2017-01-04,29.913223
2017-01-05,29.863924
2017-01-06,29.970736
2017-01-09,30.077530
...,...
2024-07-30,30.129999
2024-07-31,30.740000
2024-08-01,29.049999
2024-08-02,21.480000


Working on : SPY
there is data


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2017-01-03,198.560028
2017-01-04,199.741364
2017-01-05,199.582626
2017-01-06,200.296722
2017-01-09,199.635513
...,...
2024-07-30,542.000000
2024-07-31,550.809998
2024-08-01,543.010010
2024-08-02,532.900024
