# Pre-requisites:
Ensure that you have the following pre-requisites to run this notebook:

* Install two open source libraries in your terminal

Run 'pip install yahoo_fin'

Run 'pip install requests_html'

* Connection information to your (local) PostgreSQL database. You will need it for your config.py file that resides in the same folder as this notebook. Example:

API key for yahoofinanceapi web site

API_KEY = "######"

username = "######"

password = "######"

host = "localhost"

port = 5432

database = "######"


# 1. TRENDING STOCK SYMBOLS/TICKERS

API domain: https://yfapi.net
 API path: /v1/finance/trending/{region}

Pull data (extract) from yahoofinancials API (URL path is /v1/finance/trending/{region}).
It returns stock symbols (up to 20) for regions around the world: US AU CA FR DE HK IT ES GB IN.
Pulling for each region.

In [1]:
# Import required modules and variables
import pandas as pd
import requests
import pprint
from config import API_KEY
from config import username, password, host, port, database

In [2]:
# Start with an empty dataframe that will contain the results
# trending_df = pd.DataFrame({'symbol': 'RM', 'region': 'RM', 'startInterval': 12, 'jobTimestamp': 13 }, index=['z'])
finance_trends_df = pd.DataFrame({'symbol': [], 'region': [], 'startInterval': [], 'jobTimestamp': [] })
finance_trends_df


Unnamed: 0,symbol,region,startInterval,jobTimestamp


In [3]:
# Define connection variables
regions = ["US", "AU", "CA", "FR", "DE", "HK", "IT", "ES", "GB", "IN"]
# regions = ["US", "CA"]

url = "https://yfapi.net/v1/finance/trending/"

# Credentials to include
headers = {
    'x-api-key': API_KEY
    }

In [4]:
# trending_dict = {}
## Make API request for the number of regions of interest. Recursively save them
##  into the dataframe by using 'append'
for region in regions:
    url_string = url + region
    response = requests.request("GET", url_string, headers=headers).json()
#     trending_dict[region] = response['finance']['result']
    response1_df = pd.DataFrame(response['finance']['result'][0]['quotes'])
    response1_df['region'] = region
    response1_df['startInterval'] = response['finance']['result'][0]['startInterval']
    response1_df['jobTimestamp'] = response['finance']['result'][0]['jobTimestamp']
    finance_trends_df = finance_trends_df.append(response1_df, ignore_index=True)

    

In [5]:
## How many responses did we get?  10 regions
len(finance_trends_df['region'].unique())

10

# (TRANSFORMATION): change date/time on startInterval and jobTimestamp

In [74]:
from datetime import datetime

# Transform jobTimestamp into a date/time timestamp
finance_trends_df['jobTimestamp'] = pd.to_datetime(finance_trends_df['jobTimestamp'], unit='ms')

# Transform startInterval into a date/time format
finance_trends_df['startInterval'] = pd.to_datetime(finance_trends_df['startInterval'], format='%Y%m%d%H%M')


In [8]:
# Loop through each stock symbol and make a list of the unique stock tickers
stock_list = [stock for stock in finance_trends_df['symbol'].unique()]
stock_list

['BB',
 'FB',
 'HOOD',
 'PLUG',
 'MRM',
 'CRM',
 '1810.HK',
 'EAR',
 'U',
 'FCEL',
 'TOST',
 'TLRY',
 'RNAZ',
 'FRSH',
 'AMHC',
 'ROKU',
 'DBGI',
 'ACN',
 'NURO',
 'FEDU',
 '^AORD',
 'BABA',
 'ATER',
 'BB.TO',
 'CBDT.CN',
 'SQ',
 'AMD',
 'LSPD',
 'ENB.TO',
 'BTE.TO',
 'NVEI.TO',
 'SOFI',
 'HUT',
 'NVDA',
 'CADUSD=X',
 '^FCHI',
 'AMC',
 '^HSI',
 '3333.HK',
 'EURUSD=X',
 'RR.L',
 'AAPL',
 'GME',
 '^DJI']

In [14]:
# How many unique stock tickers do we have?
len(stock_list)

44

In [78]:
finance_trends_df.head()

Unnamed: 0,symbol,region,startInterval,jobTimestamp
0,BB,US,2021-09-23 12:00:00,2021-09-23 13:12:27.380999936
1,FB,US,2021-09-23 12:00:00,2021-09-23 13:12:27.380999936
2,HOOD,US,2021-09-23 12:00:00,2021-09-23 13:12:27.380999936
3,PLUG,US,2021-09-23 12:00:00,2021-09-23 13:12:27.380999936
4,MRM,US,2021-09-23 12:00:00,2021-09-23 13:12:27.380999936


# 2. REAL TIME QUOTE DATA

API domain: https://yfapi.net
API path: /v6/finance/quote
API variables:  querystring  {"symbols" : "string1,string2,string3,..."}

Pull data (extract) from yahoofinancials API (URL path is /v6/finance/quote) with "symbols" set to comma-separated list of up to 10 stock symbols at a time.

Use the query_string of 10 stock symbols.

In [16]:
# The quote API query wants the list of symbols as a single comma separated string 
#  with no spaces.  Max is 10 per request.
query_string=''
for stock in stock_list:
    query_string += f"{stock},"
query_string = query_string.rstrip(',')
query_string

'BB,FB,HOOD,PLUG,MRM,CRM,1810.HK,EAR,U,FCEL,TOST,TLRY,RNAZ,FRSH,AMHC,ROKU,DBGI,ACN,NURO,FEDU,^AORD,BABA,ATER,BB.TO,CBDT.CN,SQ,AMD,LSPD,ENB.TO,BTE.TO,NVEI.TO,SOFI,HUT,NVDA,CADUSD=X,^FCHI,AMC,^HSI,3333.HK,EURUSD=X,RR.L,AAPL,GME,^DJI'

In [17]:
# For now, get the first 10 and run the quote API
query_string=''
for i in range(10):
    query_string += f"{stock_list[i]},"
query_string = query_string.rstrip(',')
query_string

'BB,FB,HOOD,PLUG,MRM,CRM,1810.HK,EAR,U,FCEL'

In [18]:
## Query of /v6/finance/quote API

url = "https://yfapi.net/v6/finance/quote"

## Warning: no space between symbols
querystring = {"symbols" : query_string}

# Credentials to include
headers = {
    'x-api-key': API_KEY
    }

In [19]:
## Make API request (query string required for this API)
response2 = requests.request("GET", url, headers=headers, params=querystring).json()

# What does the response look like?  We will have to use indexing to get to the 'result' level
pprint.pprint(response2)

{'quoteResponse': {'error': None,
                   'result': [{'ask': 10.8,
                               'askSize': 9,
                               'averageAnalystRating': '3.9 - Underperform',
                               'averageDailyVolume10Day': 8836012,
                               'averageDailyVolume3Month': 10474489,
                               'bid': 10.79,
                               'bidSize': 9,
                               'bookValue': 2.571,
                               'currency': 'USD',
                               'displayName': 'BlackBerry',
                               'earningsTimestamp': 1632330304,
                               'earningsTimestampEnd': 1640552400,
                               'earningsTimestampStart': 1640120400,
                               'epsCurrentYear': -0.14,
                               'epsForward': 0.1,
                               'epsTrailingTwelveMonths': -0.944,
                               'esgPopula

In [20]:
## How many records did we get?
len(response2['quoteResponse']['result'])

10

In [53]:
# Create a new DataFrame from all the results in the dictionary
finance_quote_df = pd.DataFrame(response2['quoteResponse']['result'])
# finance_quote_df

In [63]:
finance_quote_df['volume24Hr'].unique()

KeyError: 'volume24Hr'

In [16]:
finance_quote_df.columns

Index(['language', 'region', 'quoteType', 'quoteSourceName', 'triggerable',
       'currency', 'forwardPE', 'priceToBook', 'sourceInterval',
       'exchangeDataDelayedBy', 'averageAnalystRating', 'tradeable',
       'shortName', 'postMarketChangePercent', 'postMarketTime',
       'postMarketPrice', 'postMarketChange', 'regularMarketChange',
       'regularMarketChangePercent', 'regularMarketTime', 'regularMarketPrice',
       'regularMarketDayHigh', 'regularMarketDayRange', 'regularMarketDayLow',
       'regularMarketVolume', 'regularMarketPreviousClose', 'bid', 'ask',
       'bidSize', 'askSize', 'fullExchangeName', 'financialCurrency',
       'regularMarketOpen', 'averageDailyVolume3Month',
       'averageDailyVolume10Day', 'fiftyTwoWeekLowChange',
       'fiftyTwoWeekLowChangePercent', 'fiftyTwoWeekRange',
       'fiftyTwoWeekHighChange', 'fiftyTwoWeekHighChangePercent',
       'fiftyTwoWeekLow', 'fiftyTwoWeekHigh', 'earningsTimestamp',
       'earningsTimestampStart', 'earningsTim

# 3. STOCK HISTORY

Open Source Library:  yahoo_fin
Usage: get_data(ticker, start_date = mm/dd/YYYY, end_date = None, index_as_date = True, interval = “1d”)
API variables:  ticker      a single ticker symbol or a list of ticker symbols
                start_date  first date in requested history, back to 1970
                end_date    last date in requested hostory, to the present
                index_as_date  should date column be used as the dataframe index?
                interval    data interval of 1d, 1w, 1m for daily, weekly, monthly

Pull data (extract) from yahoo_fin library with "ticker" set to a list of up to 10 stock symbols at a time.  Specify data interval and range of desired time span.

Returns one dataframe of data per ticker

In [22]:
## Query of yahoo_fin
from yahoo_fin.stock_info import get_data

my_tickers=stock_list
my_startdate="12/04/2009"
my_enddate="09/21/2021"
my_indexdate=True
my_interval="1wk"

In [23]:
## Make API request

response3 = {}
for ticker in my_tickers:
    try:
        response3[ticker] = get_data(ticker, 
                                     start_date=my_startdate,
                                     end_date=my_enddate,
                                     index_as_date=my_indexdate,
                                     interval=my_interval)
        print(f"Good response on ticker {ticker}")
    except:
        print(f"No data on ticker {ticker}...")

Good response on ticker BB
Good response on ticker FB
Good response on ticker HOOD
Good response on ticker PLUG
Good response on ticker MRM
Good response on ticker CRM
Good response on ticker 1810.HK
Good response on ticker EAR
Good response on ticker U
Good response on ticker FCEL
No data on ticker TOST...
Good response on ticker TLRY
Good response on ticker RNAZ
No data on ticker FRSH...
Good response on ticker AMHC
Good response on ticker ROKU
Good response on ticker DBGI
Good response on ticker ACN
Good response on ticker NURO
Good response on ticker FEDU
Good response on ticker ^AORD
Good response on ticker BABA
Good response on ticker ATER
Good response on ticker BB.TO
Good response on ticker CBDT.CN
Good response on ticker SQ
Good response on ticker AMD
Good response on ticker LSPD
Good response on ticker ENB.TO
Good response on ticker BTE.TO
Good response on ticker NVEI.TO
Good response on ticker SOFI
Good response on ticker HUT
Good response on ticker NVDA
Good response on tic

In [24]:
## How many records did we get?
len(response3)

42

In [25]:
# response3 is a dictionary of dataframes.  Using the .keys you can see which
#  tickers actually returned some data
response3.keys()

dict_keys(['BB', 'FB', 'HOOD', 'PLUG', 'MRM', 'CRM', '1810.HK', 'EAR', 'U', 'FCEL', 'TLRY', 'RNAZ', 'AMHC', 'ROKU', 'DBGI', 'ACN', 'NURO', 'FEDU', '^AORD', 'BABA', 'ATER', 'BB.TO', 'CBDT.CN', 'SQ', 'AMD', 'LSPD', 'ENB.TO', 'BTE.TO', 'NVEI.TO', 'SOFI', 'HUT', 'NVDA', 'CADUSD=X', '^FCHI', 'AMC', '^HSI', '3333.HK', 'EURUSD=X', 'RR.L', 'AAPL', 'GME', '^DJI'])

In [46]:
# Concatenate all dataframes
# Start with an empty dataframe having the right column names
stock_history_df = pd.DataFrame({'open': [], 'high': [], 'low': [], 'close': [], 'adjclose': [], 'volume': [], 'ticker': [] })

# Loop through the stock tickers that received a response and add them to
#   stock_history_df
for stock in response3.keys():
    stock_history_df = stock_history_df.append(response3[stock])
# Reset index so that date field is a named column
stock_history_df = stock_history_df.reset_index().rename(columns={"index": "date_db"})
len(stock_history_df)

16564

In [47]:
stock_history_df.head()

Unnamed: 0,date_db,open,high,low,close,adjclose,volume,ticker
0,2009-11-30,59.799999,60.290001,58.130001,58.75,58.75,27307600.0,BB
1,2009-12-07,59.040001,66.5,58.860001,63.84,63.84,99963600.0,BB
2,2009-12-14,64.059998,71.599998,62.529999,70.0,70.0,147390400.0,BB
3,2009-12-21,70.330002,71.540001,66.5,66.919998,66.919998,62976600.0,BB
4,2009-12-28,67.410004,68.690002,66.5,67.540001,67.540001,34025200.0,BB


# 4. Steps beyond

We could save the raw outputs into 3 database tables.

Transformations (either in the database or here in Jupyter Notebook):
The above new API pulls in dataframes with the proper timestamp format.  With this API we don't need to convert the time stamp

Transforming: renaming columns, taking fewer columns into new dataframe before loading, calculating new needed columns, dropping duplicates

Load (in the database):
Decide for each raw table above which columns will be needed for the final set of tables.  Make those tables, calculations, etc, and produce the final set of tables.  Set up connection string and method for how to transfer dataframe into database tables.

# 5. (LOAD) into database
Use the values pulled in from config.py to construct the database connection string.  Connect to the database.

In [28]:
from sqlalchemy import create_engine

# Define the engine string to be used for connecting to your database using the 
#  environment variables found in config.py
connection_string = f'{username}:{password}@{host}:{port}/{database}'
engine = create_engine(f'postgresql://{connection_string}')

In [35]:
# Write finance_trends to database
finance_trends_df.to_sql(name='finance_trends', con=engine, if_exists='append', index=False)

In [68]:
# Write finance_quote to database
finance_quote_df.to_sql(name='finance_quote', con=engine, if_exists='append', index=False)

In [50]:
# Write stock_history to database
stock_history_df.to_sql(name='stock_history', con=engine, if_exists='append', index=False)

In [69]:
# Verify that there is data in finance_trends
pd.read_sql_query('select * from finance_trends', con=engine).head()

Unnamed: 0,id_number,symbol,region,startInterval,jobTimestamp
0,,BB,US,202109200000.0,1632403000000.0
1,,FB,US,202109200000.0,1632403000000.0
2,,HOOD,US,202109200000.0,1632403000000.0
3,,PLUG,US,202109200000.0,1632403000000.0
4,,MRM,US,202109200000.0,1632403000000.0


In [70]:
# Verify that there is data in finance_quote
pd.read_sql_query('select * from finance_quote', con=engine).head()

Unnamed: 0,ask,askSize,averageAnalystRating,averageDailyVolume10Day,averageDailyVolume3Month,bid,bidSize,bookValue,currency,displayName,...,nameChangeDate,newListingDate,circulatingSupply,lastMarket,volume24Hr,volumeAllCurrencies,fromCurrency,toCurrency,startDate,coinImageUrl
0,10.8,9,3.9 - Underperform,8836012,10474489,10.79,9,2.571,USD,BlackBerry,...,,,,,,,,,,
1,348.45,14,1.9 - Buy,19128737,14037237,348.39,13,48.913,USD,Facebook,...,,,,,,,,,,
2,45.76,18,2.5 - Buy,6073312,23926582,45.71,10,-8.533,USD,Robinhood Markets,...,,,,,,,,,,
3,27.35,13,2.1 - Buy,16214775,19743384,27.36,8,8.462,USD,Plug Power,...,,,,,,,,,,
4,14.11,8,,181525,43003,14.02,14,101.868,USD,MEDIROM Healthcare,...,,,,,,,,,,


In [71]:
# Verify that there is data in stock_history
pd.read_sql_query('select * from stock_history', con=engine).head()

Unnamed: 0,date_db,open,high,low,close,adjclose,volume,ticker,jobTimestamp
0,2009-11-30,59.799999,60.290001,58.130001,58.75,58.75,27307600.0,BB,
1,2009-12-07,59.040001,66.5,58.860001,63.84,63.84,99963600.0,BB,
2,2009-12-14,64.059998,71.599998,62.529999,70.0,70.0,147390400.0,BB,
3,2009-12-21,70.330002,71.540001,66.5,66.919998,66.919998,62976600.0,BB,
4,2009-12-28,67.410004,68.690002,66.5,67.540001,67.540001,34025200.0,BB,
