# yfinance

## Package imports

In [1]:
import yfinance as yf
import pandas as pd
from google_auth_oauthlib import flow

In [6]:
def _get_dataframe(ticker_name, start, end):
    """
    get_dataframe(ticker_name, start, end)
    Downloads OHLC,adj close and volume from yahoo finance
    returns dataframe

    """
    df = yf.download(ticker_name, start=start, end=end)
    return df


def _get_start_end(kwargs):
    '''
    get_start_end(kwargs)
    from kwargs, get start, end dates
    if not stated, will return default values
    return start, end dates
    '''
    start = kwargs.get('start', "2018-01-01")
    end = kwargs.get('end', "2020-12-31") 
    return start, end
         

def save_local(ticker_name, path_filename, **kwargs):
    """
    save_local(path_filename, ticker_name, **kwargs)
    save df to local path
    """
    start, end = _get_start_end(kwargs)
      
    df = _get_dataframe(ticker_name, start=start, end=end)
    if len(df) != 0:
        df.to_csv(path_filename)
        print(f"{ticker_name} from {start} to {end} saved to {path_filename}")
        

def _get_credentials(secrets='client_secrets.json'):
    """
    get_credentials(secrets='client_secrets.json')
    open browser to authenticate file transfer,
    saves credentials to global var
    """
    appflow = flow.InstalledAppFlow.from_client_secrets_file(
        secrets, scopes=["https://www.googleapis.com/auth/bigquery"]
    )
    appflow.run_local_server()
    global credentials 
    credentials = appflow.credentials
    

def save_gbq(ticker_name, table_name, **kwargs):
    """
    save_to_gbq(table_name, project_id=None)
    convert df to uploadable format for gbq
    """
    #check if credentials exist, create credentials if necssary
    try: 
        credentials
    except:
        _get_credentials()
    
    start, end = _get_start_end(kwargs)
    project_id = kwargs.get('project_id', "ioracle")
    
            
    temp = _get_dataframe(ticker_name, start=start, end=end)
    
##    For testing, avoid keep downloading data
#     temp = pd.read_csv('play.csv')

    if len(temp) != 0: # check that df is not empty
        temp = temp.rename(columns={'Adj Close': 'Adj_Close'}).reset_index()
        temp.to_gbq(f'{project_id}.main.{table_name}', 
                    project_id=project_id, 
                    credentials=credentials,
                    table_schema = [{'name': 'Date','type':'DATE'}] #hard code schema for date from DATETIME to DATE
                   )
        

def read_local(path_filename):
    """
    read_local(path_filename)
    reads the csv file and parses date col as date, setting the date as the index
    returns the df
    """
    df = pd.read_csv(path_filename)
    df['Date'] = pd.to_datetime(df['Date'])
    return df.set_index('Date')
    

# read from gbq (undo changes)
def read_gbq(table_name, **kwargs):
    #check if credentials exist, create credentials if necssary
    try: 
        credentials
    except:
        _get_credentials()
        
    project_id = kwargs.get('project_id', "ioracle")
    
    sql = f"SELECT * FROM `{project_id}.main.{table_name}` "
    
    df = pd.read_gbq(sql, project_id=project_id, credentials=credentials)
    df = df.sort_values('Date').set_index('Date')
    
    return df
    


In [12]:
save_local('aapl', 'play.csv', start='2020-01-01')

[*********************100%***********************]  1 of 1 completed
aapl from 2020-01-01 to 2020-12-31 saved to play.csv


In [31]:
read_local('play.csv').index

DatetimeIndex(['2019-12-31', '2020-01-02', '2020-01-03', '2020-01-06',
               '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10',
               '2020-01-13', '2020-01-14',
               ...
               '2020-12-16', '2020-12-17', '2020-12-18', '2020-12-21',
               '2020-12-22', '2020-12-23', '2020-12-24', '2020-12-28',
               '2020-12-29', '2020-12-30'],
              dtype='datetime64[ns]', name='Date', length=253, freq=None)

In [36]:
save_gbq("TSLA", 'test_data', start='2020-01-01')

[*********************100%***********************]  1 of 1 completed


In [22]:
df = pd.read_csv('play.csv')
df.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [23]:
df = df.set_index('Date')
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-31,72.482498,73.419998,72.379997,73.412498,72.33799,100805600
2020-01-02,74.059998,75.150002,73.797501,75.087502,73.98848,135480400
2020-01-03,74.287498,75.144997,74.125,74.357498,73.269142,146322800
2020-01-06,73.447502,74.989998,73.1875,74.949997,73.852982,118387200
2020-01-07,74.959999,75.224998,74.370003,74.597504,73.505638,108872000


In [24]:
df.index

Index(['2019-12-31', '2020-01-02', '2020-01-03', '2020-01-06', '2020-01-07',
       '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-13', '2020-01-14',
       ...
       '2020-12-16', '2020-12-17', '2020-12-18', '2020-12-21', '2020-12-22',
       '2020-12-23', '2020-12-24', '2020-12-28', '2020-12-29', '2020-12-30'],
      dtype='object', name='Date', length=253)

## Download Data

In [14]:
aapl_data = yf.download("AAPL", start="2018-01-01", end="2020-12-31")

[*********************100%***********************]  1 of 1 completed


In [15]:
aapl_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,42.540001,43.075001,42.314999,43.064999,41.188164,102223600
2018-01-03,43.132500,43.637501,42.990002,43.057499,41.180988,118071600
2018-01-04,43.134998,43.367500,43.020000,43.257500,41.372272,89738400
2018-01-05,43.360001,43.842499,43.262501,43.750000,41.843307,94640000
2018-01-08,43.587502,43.902500,43.482498,43.587502,41.687889,82271200
...,...,...,...,...,...,...
2020-12-23,132.160004,132.429993,130.779999,130.960007,130.157623,88223700
2020-12-24,131.320007,133.460007,131.100006,131.970001,131.161407,54930100
2020-12-28,133.990005,137.339996,133.509995,136.690002,135.852493,124486200
2020-12-29,138.050003,138.789993,134.339996,134.869995,134.043640,121047300


In [6]:
aapl_data.to_csv('test.csv')

In [7]:
tsla_data = yf.download("TSLA", start="2018-01-01", end="2020-12-31")

[*********************100%***********************]  1 of 1 completed


In [8]:
tsla_data = tsla_data.rename(columns={'Adj Close': 'Adj_Close'}).reset_index()
tsla_data

Unnamed: 0,Date,Open,High,Low,Close,Adj_Close,Volume
0,2018-01-02,62.400002,64.421997,62.200001,64.106003,64.106003,21761000
1,2018-01-03,64.199997,65.050003,63.110001,63.450001,63.450001,22607500
2,2018-01-04,62.574001,63.709999,61.136002,62.924000,62.924000,49731500
3,2018-01-05,63.324001,63.448002,62.400002,63.316002,63.316002,22956000
4,2018-01-08,63.200001,67.403999,63.099998,67.281998,67.281998,49297000
...,...,...,...,...,...,...,...
750,2020-12-23,632.200012,651.500000,622.570007,645.979980,645.979980,33173000
751,2020-12-24,642.989990,666.090027,641.000000,661.770020,661.770020,22865600
752,2020-12-28,674.510010,681.400024,660.799988,663.690002,663.690002,32278600
753,2020-12-29,661.000000,669.900024,655.000000,665.989990,665.989990,22910800


In [17]:
tsla_data.dtypes

Date         datetime64[ns]
Open                float64
High                float64
Low                 float64
Close               float64
Adj_Close           float64
Volume                int64
dtype: object

In [5]:


# TODO: Uncomment the line below to set the `launch_browser` variable.
launch_browser = True
#
# The `launch_browser` boolean variable indicates if a local server is used
# as the callback URL in the auth flow. A value of `True` is recommended,
# but a local server does not work if accessing the application remotely,
# such as over SSH or from a remote Jupyter notebook.

appflow = flow.InstalledAppFlow.from_client_secrets_file(
    "client_secrets.json", scopes=["https://www.googleapis.com/auth/bigquery"]
)

if launch_browser:
    appflow.run_local_server()
else:
    appflow.run_console()

credentials = appflow.credentials

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=284152559720-tp4cmv7dh2fff2a0n0h6rel90pmnj7t4.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fbigquery&state=01tOsnOG2i52ewfRy8Bw16F8a5ZmmL&access_type=offline


In [18]:
credentials

<google.oauth2.credentials.Credentials at 0x7f2c835d7490>

In [22]:
tsla_data.to_gbq("ioracle.main.test_data", project_id=project_id, credentials=credentials)

## Read Data

In [4]:
read_local('play.csv')

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-31,72.482498,73.419998,72.379997,73.412498,72.337990,100805600
2020-01-02,74.059998,75.150002,73.797501,75.087502,73.988480,135480400
2020-01-03,74.287498,75.144997,74.125000,74.357498,73.269142,146322800
2020-01-06,73.447502,74.989998,73.187500,74.949997,73.852982,118387200
2020-01-07,74.959999,75.224998,74.370003,74.597504,73.505638,108872000
...,...,...,...,...,...,...
2020-12-23,132.160004,132.429993,130.779999,130.960007,130.157608,88223700
2020-12-24,131.320007,133.460007,131.100006,131.970001,131.161407,54930100
2020-12-28,133.990005,137.339996,133.509995,136.690002,135.852509,124486200
2020-12-29,138.050003,138.789993,134.339996,134.869995,134.043655,121047300


In [7]:
read_gbq('play_data')

Unnamed: 0_level_0,Open,High,Low,Close,Adj_Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-31,72.482498,73.419998,72.379997,73.412498,72.337990,100805600
2020-01-02,74.059998,75.150002,73.797501,75.087502,73.988464,135480400
2020-01-03,74.287498,75.144997,74.125000,74.357498,73.269157,146322800
2020-01-06,73.447502,74.989998,73.187500,74.949997,73.852974,118387200
2020-01-07,74.959999,75.224998,74.370003,74.597504,73.505646,108872000
...,...,...,...,...,...,...
2020-12-23,132.160004,132.429993,130.779999,130.960007,130.157608,88223700
2020-12-24,131.320007,133.460007,131.100006,131.970001,131.161423,54930100
2020-12-28,133.990005,137.339996,133.509995,136.690002,135.852509,124486200
2020-12-29,138.050003,138.789993,134.339996,134.869995,134.043655,121047300


In [13]:
project_id = "ioracle"
table = "play_data"

In [14]:
 sql = f"SELECT * FROM `{project_id}.main.{table}` "
df = pd.read_gbq(sql, project_id=project_id, credentials=credentials)

In [15]:
df.sort_values('Date').set_index('Date')

Unnamed: 0_level_0,Open,High,Low,Close,Adj_Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-31,72.482498,73.419998,72.379997,73.412498,72.337990,100805600
2020-01-02,74.059998,75.150002,73.797501,75.087502,73.988464,135480400
2020-01-03,74.287498,75.144997,74.125000,74.357498,73.269157,146322800
2020-01-06,73.447502,74.989998,73.187500,74.949997,73.852974,118387200
2020-01-07,74.959999,75.224998,74.370003,74.597504,73.505646,108872000
...,...,...,...,...,...,...
2020-12-23,132.160004,132.429993,130.779999,130.960007,130.157608,88223700
2020-12-24,131.320007,133.460007,131.100006,131.970001,131.161423,54930100
2020-12-28,133.990005,137.339996,133.509995,136.690002,135.852509,124486200
2020-12-29,138.050003,138.789993,134.339996,134.869995,134.043655,121047300
