# yfinance

## Package imports

In [5]:
import yfinance as yf
import pandas as pd
from datetime import datetime as dt
import os #only for jupyter notebook

In [2]:
# only for jupyter notebook
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="../../service-account-file.json"

In [3]:
def _get_dataframe(ticker_name, start, end):
    """
    get_dataframe(ticker_name, start, end)
    Downloads OHLC,adj close and volume from yahoo finance
    returns dataframe

    """
    df = yf.download(ticker_name, start=start, end=end)
    return df


def _get_start_end(kwargs):
    '''
    get_start_end(kwargs)
    from kwargs, get start, end dates
    if not stated, will return default values
    return start, end dates
    '''
    start = kwargs.get('start', "2017-01-01")
    end = kwargs.get('end', dt.today().strftime('%Y-%m-%d')) #not inclusive
    return start, end
         

def save_local(ticker_name, path_filename, **kwargs):
    """
    save_local(path_filename, ticker_name, **kwargs)
    save df to local path
    """
    start, end = _get_start_end(kwargs)
      
    df = _get_dataframe(ticker_name, start=start, end=end)
    if len(df) != 0:
        df.to_csv(path_filename)
        print(f"{ticker_name} from {start} to {end} saved to {path_filename}")
        

def save_gbq(ticker_name, table_name, **kwargs):
    """
    save_to_gbq(table_name, project_id=None)
    convert df to uploadable format for gbq
    """
    
    start, end = _get_start_end(kwargs)
    project_id = kwargs.get('project_id', "ioracle")
    
            
    temp = _get_dataframe(ticker_name, start=start, end=end)
    
##    For testing, avoid keep downloading data
#     temp = pd.read_csv('play.csv')

    if len(temp) != 0: # check that df is not empty
        temp = temp.rename(columns={'Adj Close': 'Adj_Close'}).reset_index()
        temp.to_gbq(f'{project_id}.main.{table_name}', 
                    project_id=project_id, 
                    table_schema = [{'name': 'Date','type':'DATE'}], #hard code schema for date from DATETIME to DATE
                    if_exists='replace'
                   )
        

def read_local(path_filename):
    """
    read_local(path_filename)
    reads the csv file and parses date col as date, setting the date as the index
    returns the df
    """
    df = pd.read_csv(path_filename)
    df['Date'] = pd.to_datetime(df['Date'])
    return df.set_index('Date')
    

# read from gbq (undo changes)
def read_gbq(table_name, **kwargs):

        
    project_id = kwargs.get('project_id', "ioracle")

    sql = f"SELECT * FROM `{project_id}.main.{table_name}` "

    df = pd.read_gbq(sql, project_id=project_id)
    df = df.sort_values('Date').set_index('Date')   
    return df

## Download Data

In [3]:
save_local('aapl', 'aapl.csv')

[*********************100%***********************]  1 of 1 completed
aapl from 2017-01-01 to 2022-01-08 saved to aapl.csv


In [4]:
save_gbq("aapl", 'aapl_data')

[*********************100%***********************]  1 of 1 completed


100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 7710.12it/s]


## Read Data

In [7]:
read_local('aapl.csv')

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-03,28.950001,29.082500,28.690001,29.037500,27.332468,115127600
2017-01-04,28.962500,29.127501,28.937500,29.004999,27.301880,84472400
2017-01-05,28.980000,29.215000,28.952499,29.152500,27.440718,88774400
2017-01-06,29.195000,29.540001,29.117500,29.477501,27.746639,127007600
2017-01-09,29.487499,29.857500,29.485001,29.747499,28.000778,134247600
...,...,...,...,...,...,...
2021-12-27,177.089996,180.419998,177.070007,180.330002,180.330002,74919600
2021-12-28,180.160004,181.330002,178.529999,179.289993,179.289993,79144300
2021-12-29,179.330002,180.630005,178.139999,179.380005,179.380005,62348900
2021-12-30,179.470001,180.570007,178.089996,178.199997,178.199997,59773000


In [4]:
read_gbq('aapl_data')


Unnamed: 0_level_0,Open,High,Low,Close,Adj_Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-03,28.950001,29.082500,28.690001,29.037500,27.332472,115127600
2017-01-04,28.962500,29.127501,28.937500,29.004999,27.301874,84472400
2017-01-05,28.980000,29.215000,28.952499,29.152500,27.440720,88774400
2017-01-06,29.195000,29.540001,29.117500,29.477501,27.746634,127007600
2017-01-09,29.487499,29.857500,29.485001,29.747499,28.000778,134247600
...,...,...,...,...,...,...
2022-01-03,177.830002,182.880005,177.710007,182.009995,182.009995,104487900
2022-01-04,182.630005,182.940002,179.119995,179.699997,179.699997,99310400
2022-01-05,179.610001,180.169998,174.639999,174.919998,174.919998,94537600
2022-01-06,172.699997,175.300003,171.639999,172.000000,172.000000,96904000
