# 1. TRENDING STOCK SYMBOLS

API domain: https://yfapi.net
API path: /v1/finance/trending/{region}

Pull data (extract) from yahoofinancials API (URL path is /v1/finance/trending/{region}).
It returns 20 stock symbols based on the region in the URL.

Uses your basic subscription API key obtained from yahoofinancials web site.
Current region: US

In [1]:
# Import required modules
import pandas as pd

In [2]:
##  Ensure you have your own config.py file in same folder so you can import your api key
##   and PostgreSQL database connection string variables
import requests
import pprint
from config import API_KEY
from config import username, password, host, port, database

region_selected = "US"
url = "https://yfapi.net/v1/finance/trending/" + region_selected
## Alternate url
# url = "https://rest.yahoofinanceapi.com/v1/finance/trending/" + region_selected

# Credentials to include
headers = {
    'x-api-key': API_KEY
    }

In [3]:
## Make API request (no query string required for this API)
response1 = requests.request("GET", url, headers=headers).json()

# What does the response look like?  We will have to use indexing to get to the 'result' level
pprint.pprint(response1)

{'finance': {'error': None,
             'result': [{'count': 20,
                         'jobTimestamp': 1632262197005,
                         'quotes': [{'symbol': 'DIS'},
                                    {'symbol': 'ADBE'},
                                    {'symbol': 'FDX'},
                                    {'symbol': 'QS'},
                                    {'symbol': 'MRIN'},
                                    {'symbol': 'RWLK'},
                                    {'symbol': 'UBER'},
                                    {'symbol': 'DKNG'},
                                    {'symbol': 'SFIX'},
                                    {'symbol': 'NNVC'},
                                    {'symbol': 'HLBZ'},
                                    {'symbol': 'LCID'},
                                    {'symbol': 'ETH-USD'},
                                    {'symbol': 'UPST'},
                                    {'symbol': 'XELA'},
                                    {'s

# Optional: Check that 'error' was 'None'
Insert code to show error code, does an assert that code ran fine, shows number of tickers received

In [4]:
## How many responses did we get?  Single result with multiple columns
len(response1['finance']['result'])

1

In [5]:
## Parse through the resulting dictionary tree to get at the 'results'.  Look at only the first
##  result in order to see what columns we received.
response1['finance']['result'][0]

{'count': 20,
 'quotes': [{'symbol': 'DIS'},
  {'symbol': 'ADBE'},
  {'symbol': 'FDX'},
  {'symbol': 'QS'},
  {'symbol': 'MRIN'},
  {'symbol': 'RWLK'},
  {'symbol': 'UBER'},
  {'symbol': 'DKNG'},
  {'symbol': 'SFIX'},
  {'symbol': 'NNVC'},
  {'symbol': 'HLBZ'},
  {'symbol': 'LCID'},
  {'symbol': 'ETH-USD'},
  {'symbol': 'UPST'},
  {'symbol': 'XELA'},
  {'symbol': 'NURO'},
  {'symbol': 'UPS'},
  {'symbol': 'ABNB'},
  {'symbol': 'GTEK'},
  {'symbol': 'BLIN'}],
 'jobTimestamp': 1632262197005,
 'startInterval': 202109212100}

In [6]:
# Create a new DataFrame from all the results in the dictionary
response1_df = pd.DataFrame(response1['finance']['result'])
response1_df.head()

Unnamed: 0,count,quotes,jobTimestamp,startInterval
0,20,"[{'symbol': 'DIS'}, {'symbol': 'ADBE'}, {'symb...",1632262197005,202109212100


In [7]:
# Extraction of the timestamp, startInterval
from datetime import datetime
start_interval = response1_df['startInterval'].to_string()
job_timestamp = response1_df['jobTimestamp']

job_timestamp = pd.to_datetime(job_timestamp, unit='ms')
# start_interval = datetime.strptime(start_interval, '%Y%m%d%H%M')
start_interval, job_timestamp

('0    202109212100',
 0   2021-09-21 22:09:57.005
 Name: jobTimestamp, dtype: datetime64[ns])

In [8]:
# Loop through each stock symbol and make a list
## Using 'for' loop
# stock_list=[]
# for stock in response1_df['quotes'][0]:
#     #print(stock['symbol'])
#     stock_list.append(stock['symbol'])
## Using list comprehension notation
# stock_list = [stock['symbol'] for stock in response1_df['quotes'][0]]
stock_list = [stock['symbol'] for stock in response1_df['quotes'][0]]
stock_list

['DIS',
 'ADBE',
 'FDX',
 'QS',
 'MRIN',
 'RWLK',
 'UBER',
 'DKNG',
 'SFIX',
 'NNVC',
 'HLBZ',
 'LCID',
 'ETH-USD',
 'UPST',
 'XELA',
 'NURO',
 'UPS',
 'ABNB',
 'GTEK',
 'BLIN']

In [9]:
# The quote API query wants the list of symbols as a single comma separated string 
#  with no spaces.  Max is 10 per request.
query_string=''
for stock in stock_list:
    query_string += f"{stock},"
query_string = query_string.rstrip(',')
query_string

'DIS,ADBE,FDX,QS,MRIN,RWLK,UBER,DKNG,SFIX,NNVC,HLBZ,LCID,ETH-USD,UPST,XELA,NURO,UPS,ABNB,GTEK,BLIN'

In [10]:
# For now, get the first 10 and run the quote API
query_string=''
for i in range(10):
    query_string += f"{response1_df['quotes'][0][i]['symbol']},"
query_string = query_string.rstrip(',')
query_string

'DIS,ADBE,FDX,QS,MRIN,RWLK,UBER,DKNG,SFIX,NNVC'

# 2. REAL TIME QUOTE DATA

API domain: https://yfapi.net
API path: /v6/finance/quote
API variables:  querystring  {"symbols" : "string1,string2,string3,..."}

Pull data (extract) from yahoofinancials API (URL path is /v6/finance/quote) with "symbols" set to comma-separated list of up to 10 stock symbols at a time.

Use the query_string of 10 stock symbols.

In [11]:
## Query of /v6/finance/quote API

url = "https://yfapi.net/v6/finance/quote"
## Alternate url
# url = "https://rest.yahoofinanceapi.com/v6/finance/quote"

## Warning: no space between symbols
querystring = {"symbols" : query_string}

# Credentials to include
headers = {
    'x-api-key': API_KEY
    }

In [12]:
## Make API request (query string required for this API)
response2 = requests.request("GET", url, headers=headers, params=querystring).json()

# What does the response look like?  We will have to use indexing to get to the 'result' level
pprint.pprint(response2)

{'quoteResponse': {'error': None,
                   'result': [{'ask': 171.2,
                               'askSize': 10,
                               'averageAnalystRating': '1.9 - Buy',
                               'averageDailyVolume10Day': 7116733,
                               'averageDailyVolume3Month': 8272410,
                               'bid': 171.36,
                               'bidSize': 10,
                               'bookValue': 48.243,
                               'currency': 'USD',
                               'dividendDate': 1579132800,
                               'earningsTimestamp': 1628784300,
                               'earningsTimestampEnd': 1636977600,
                               'earningsTimestampStart': 1636541940,
                               'epsCurrentYear': 2.45,
                               'epsForward': 5.08,
                               'epsTrailingTwelveMonths': 0.61,
                               'esgPopulated': Fa

In [13]:
## How many records did we get?
len(response2['quoteResponse']['result'])

10

In [14]:
# Create a new DataFrame from all the results in the dictionary
response2_df = pd.DataFrame(response2['quoteResponse']['result'])
response2_df

Unnamed: 0,language,region,quoteType,quoteSourceName,triggerable,currency,firstTradeDateMilliseconds,priceHint,shortName,marketState,...,exchange,longName,messageBoardId,exchangeTimezoneName,exchangeTimezoneShortName,gmtOffSetMilliseconds,market,esgPopulated,symbol,displayName
0,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,-252322200000,2,Walt Disney Company (The),POST,...,NYQ,The Walt Disney Company,finmb_191564,America/New_York,EDT,-14400000,us_market,False,DIS,
1,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,524323800000,2,Adobe Inc.,POST,...,NMS,Adobe Inc.,finmb_24321,America/New_York,EDT,-14400000,us_market,False,ADBE,Adobe
2,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,261239400000,2,FedEx Corporation,POST,...,NYQ,FedEx Corporation,finmb_124423,America/New_York,EDT,-14400000,us_market,False,FDX,FedEx
3,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,1597671000000,2,QuantumScape Corporation,POST,...,NYQ,QuantumScape Corporation,finmb_204748122,America/New_York,EDT,-14400000,us_market,False,QS,QuantumScape
4,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,1363959000000,2,Marin Software Incorporated,POST,...,NGM,Marin Software Incorporated,finmb_35921511,America/New_York,EDT,-14400000,us_market,False,MRIN,
5,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,1410528600000,4,ReWalk Robotics Ltd.,POST,...,NCM,ReWalk Robotics Ltd.,finmb_268101180,America/New_York,EDT,-14400000,us_market,False,RWLK,ReWalk Robotics
6,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,1557495000000,2,"Uber Technologies, Inc.",POST,...,NYQ,"Uber Technologies, Inc.",finmb_144524848,America/New_York,EDT,-14400000,us_market,False,UBER,Uber
7,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,1564061400000,2,DraftKings Inc.,POST,...,NMS,DraftKings Inc.,finmb_648976269,America/New_York,EDT,-14400000,us_market,False,DKNG,DraftKings
8,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,1510929000000,2,"Stitch Fix, Inc.",POST,...,NMS,"Stitch Fix, Inc.",finmb_142659894,America/New_York,EDT,-14400000,us_market,False,SFIX,Stitch Fix
9,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,1130333400000,2,"NanoViricides, Inc.",POST,...,ASE,"NanoViricides, Inc.",finmb_22594191,America/New_York,EDT,-14400000,us_market,False,NNVC,NanoViricides


# 3. STOCK HISTORY

Open Source Library:  yahoo_fin
Usage: get_data(ticker, start_date = mm/dd/YYYY, end_date = None, index_as_date = True, interval = “1d”)
API variables:  ticker      a single ticker symbol or a list of ticker symbols
                start_date  first date in requested history, back to 1970
                end_date    last date in requested hostory, to the present
                index_as_date  should date column be used as the dataframe index?
                interval    data interval of 1d, 1w, 1m for daily, weekly, monthly

Pull data (extract) from yahoo_fin library with "ticker" set to a list of up to 10 stock symbols at a time.  Specify data interval and range of desired time span.

Returns one dataframe of data per ticker

In [15]:
## Query of yahoo_fin
from yahoo_fin.stock_info import get_data

my_tickers=stock_list
my_startdate="12/04/2009"
my_enddate="09/21/2021"
my_indexdate=True
my_interval="1wk"

In [17]:
## Make API request

response3 = {}
for ticker in my_tickers:
    response3[ticker] = get_data(ticker, 
                                 start_date=my_startdate,
                                 end_date=my_enddate,
                                 index_as_date=my_indexdate,
                                 interval=my_interval)

In [18]:
## How many records did we get?
len(response3)

20

In [19]:
# response3 is a dictionary of dataframes.  To access each, you should run a 'for'
## loop through it.  Here you can look at the 20 keys (stock symbols) for the latest
## request
response3.keys()

dict_keys(['DIS', 'ADBE', 'FDX', 'QS', 'MRIN', 'RWLK', 'UBER', 'DKNG', 'SFIX', 'NNVC', 'HLBZ', 'LCID', 'ETH-USD', 'UPST', 'XELA', 'NURO', 'UPS', 'ABNB', 'GTEK', 'BLIN'])

In [20]:
# Each member of the response3 dictionary is a dataframe
response3['DIS']

Unnamed: 0,open,high,low,close,adjclose,volume,ticker
2009-11-30,30.799999,31.100000,30.299999,30.840000,26.556301,23918100,DIS
2009-12-07,30.930000,31.830000,30.450001,31.700001,27.296848,59513000,DIS
2009-12-14,31.920000,32.590000,31.580000,31.950001,27.829397,66176900,DIS
2009-12-21,32.049999,32.500000,31.940001,32.290001,28.125551,26990200,DIS
2009-12-28,32.240002,32.750000,31.799999,32.250000,28.090706,40567900,DIS
...,...,...,...,...,...,...,...
2021-08-23,177.080002,180.539993,175.910004,180.139999,180.139999,31378100,DIS
2021-08-30,180.500000,185.300003,178.529999,181.000000,181.000000,35684100,DIS
2021-09-06,181.020004,187.580002,180.050003,184.119995,184.119995,32095100,DIS
2021-09-13,185.119995,185.899994,181.100006,183.470001,183.470001,34650800,DIS


# 4. Steps beyond

We could save the raw outputs into 3 database tables.

Transformations (either in the database or here in Jupyter Notebook):
The above new API pulls in dataframes with the proper timestamp format.  With this API we don't need to convert the time stamp

Transforming: renaming columns, taking fewer columns into new dataframe before loading, calculating new needed columns, dropping duplicates

Load (in the database):
Decide for each raw table above which columns will be needed for the final set of tables.  Make those tables, calculations, etc, and produce the final set of tables.  Set up connection string and method for how to transfer dataframe into database tables.

# 5. Connect to database
Use the values pulled in from config.py to construct the database connection string.  Connect to the database.

In [21]:
from sqlalchemy import create_engine

# Define the engine string to be used for connecting to your database using the 
#  environment variables found in config.py

database_url = f'postgresql://{username}:{password}@{host}:{port}/{database}'
# database_url = f"postgresql://{username}:{password}@{host}:5432/{database}"
### Alternate URLs
# database_url = os.environ.get('DATABASE_URL', 'postgresql://localhost:5432/EmployeeSQL')
# connection_string = f'${username}:${password}@${host}:${port}/${database}'
# engine = create_engine(f'postgresql://{connection_string}')
engine = create_engine(database_url)
# connection = engine.connect()

In [22]:
# Example of how to work with subset of data into a new dataframe
# new_customer_data_df = customer_data_df[['id', 'first_name', 'last_name']].copy()
# new_customer_data_df.head()

# Write dataframe to database, decide whether index is true or false, depending on
#   whether the index should be a column or not (possible primary key column)
# transformed_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)
# premise_transformed_df.to_sql(name='premise', con=engine, if_exists='append', index=True)

# Verify data presence by querying database
pd.read_sql_query('select * from departments', con=engine).head()

Unnamed: 0,dept_no,dept_name
0,d001,Marketing
1,d002,Finance
2,d003,Human Resources
3,d004,Production
4,d005,Development
