# 1. TRENDING STOCK SYMBOLS

API domain: https://yfapi.net
API path: /v1/finance/trending/{region}

Pull data (extract) from yahoofinancials API (URL path is /v1/finance/trending/{region}).
It returns 20 stock symbols based on the region in the URL.

Uses your basic subscription API key obtained from yahoofinancials web site.
Current region: US

In [3]:
# Import required modules
import pandas as pd

In [4]:
##  Ensure you have your own config.py file in same folder so you can import your api key
##   and PostgreSQL database connection string variables
import requests
import pprint
from config import API_KEY
from config import username, password, host, port, database

region_selected = "US"
url = "https://yfapi.net/v1/finance/trending/" + region_selected
## Alternate url
# url = "https://rest.yahoofinanceapi.com/v1/finance/trending/" + region_selected

# Credentials to include
headers = {
    'x-api-key': API_KEY
    }

In [6]:
## Make API request (no query string required for this API)
response1 = requests.request("GET", url, headers=headers).json()

# What does the response look like?  We will have to use indexing to get to the 'result' level
pprint.pprint(response1)

{'finance': {'error': None,
             'result': [{'count': 20,
                         'jobTimestamp': 1632255052141,
                         'quotes': [{'symbol': 'DIS'},
                                    {'symbol': 'QS'},
                                    {'symbol': 'UBER'},
                                    {'symbol': 'LCID'},
                                    {'symbol': 'RWLK'},
                                    {'symbol': 'DKNG'},
                                    {'symbol': 'UPST'},
                                    {'symbol': 'NNVC'},
                                    {'symbol': 'HLBZ'},
                                    {'symbol': 'ABNB'},
                                    {'symbol': 'GTEK'},
                                    {'symbol': 'INDI'},
                                    {'symbol': 'NURO'},
                                    {'symbol': 'SAVA'},
                                    {'symbol': 'CRVS'},
                                    {'sym

# Optional: Check that 'error' was 'None'
# Query pulled 20 stock symbols in US.

In [7]:
## How many responses did we get?  Single result with multiple columns
len(response1['finance']['result'])

1

In [8]:
## Parse through the resulting dictionary tree to get at the 'results'.  Look at only the first
##  result in order to see what columns we received.
response1['finance']['result'][0]

{'count': 20,
 'quotes': [{'symbol': 'DIS'},
  {'symbol': 'QS'},
  {'symbol': 'UBER'},
  {'symbol': 'LCID'},
  {'symbol': 'RWLK'},
  {'symbol': 'DKNG'},
  {'symbol': 'UPST'},
  {'symbol': 'NNVC'},
  {'symbol': 'HLBZ'},
  {'symbol': 'ABNB'},
  {'symbol': 'GTEK'},
  {'symbol': 'INDI'},
  {'symbol': 'NURO'},
  {'symbol': 'SAVA'},
  {'symbol': 'CRVS'},
  {'symbol': 'SEEL'},
  {'symbol': 'SGOC'},
  {'symbol': 'DISCB'},
  {'symbol': 'BBIG'},
  {'symbol': 'SPIR'}],
 'jobTimestamp': 1632255052141,
 'startInterval': 202109211900}

In [9]:
# Create a new DataFrame from all the results in the dictionary
response1_df = pd.DataFrame(response1['finance']['result'])
response1_df.head()

Unnamed: 0,count,quotes,jobTimestamp,startInterval
0,20,"[{'symbol': 'DIS'}, {'symbol': 'QS'}, {'symbol...",1632255052141,202109211900


In [10]:
# Extraction of the quotes, timestamp, startInterval
start_interval = response1_df['startInterval']
start_interval

0    202109211900
Name: startInterval, dtype: int64

In [11]:
# Loop through each stock symbol retrieved. Use later in retrieving quote and detail
## Using 'for' loop
# stock_list=[]
# for stock in response1_df['quotes'][0]:
#     #print(stock['symbol'])
#     stock_list.append(stock['symbol'])
## Using list comprehension notation
# stock_list = [stock['symbol'] for stock in response1_df['quotes'][0]]
stock_list = [stock['symbol'] for stock in response1_df['quotes'][0]]
stock_list

['DIS',
 'QS',
 'UBER',
 'LCID',
 'RWLK',
 'DKNG',
 'UPST',
 'NNVC',
 'HLBZ',
 'ABNB',
 'GTEK',
 'INDI',
 'NURO',
 'SAVA',
 'CRVS',
 'SEEL',
 'SGOC',
 'DISCB',
 'BBIG',
 'SPIR']

In [12]:
# The quote API query wants the list of symbols as a single comma separated string 
#  with no spaces.  Max is 10 per request.
query_string=''
for stock in stock_list:
    query_string += f"{stock},"
query_string = query_string.rstrip(',')
query_string

'DIS,QS,UBER,LCID,RWLK,DKNG,UPST,NNVC,HLBZ,ABNB,GTEK,INDI,NURO,SAVA,CRVS,SEEL,SGOC,DISCB,BBIG,SPIR'

In [13]:
# For now, get the first 10 and run the quote API
query_string=''
for i in range(10):
    query_string += f"{response1_df['quotes'][0][i]['symbol']},"
query_string = query_string.rstrip(',')
query_string

'DIS,QS,UBER,LCID,RWLK,DKNG,UPST,NNVC,HLBZ,ABNB'

# 2. REAL TIME QUOTE DATA

API domain: https://yfapi.net
API path: /v6/finance/quote
API variables:  querystring  {"symbols" : "string1,string2,string3,..."}

Pull data (extract) from yahoofinancials API (URL path is /v6/finance/quote) with "symbols" set to comma-separated list of up to 10 stock symbols at a time.

Use the query_string of 10 stock symbols.

In [14]:
## Query of /v6/finance/quote API

url = "https://yfapi.net/v6/finance/quote"
## Alternate url
# url = "https://rest.yahoofinanceapi.com/v6/finance/quote"

## Warning: no space between symbols
querystring = {"symbols" : query_string}

# Credentials to include
headers = {
    'x-api-key': API_KEY
    }

In [15]:
## Make API request (query string required for this API)
response2 = requests.request("GET", url, headers=headers, params=querystring).json()

# What does the response look like?  We will have to use indexing to get to the 'result' level
pprint.pprint(response2)

{'quoteResponse': {'error': None,
                   'result': [{'ask': 171.12,
                               'askSize': 10,
                               'averageAnalystRating': '1.9 - Buy',
                               'averageDailyVolume10Day': 7116733,
                               'averageDailyVolume3Month': 8272410,
                               'bid': 171.03,
                               'bidSize': 10,
                               'bookValue': 48.243,
                               'currency': 'USD',
                               'dividendDate': 1579132800,
                               'earningsTimestamp': 1628784300,
                               'earningsTimestampEnd': 1636977600,
                               'earningsTimestampStart': 1636541940,
                               'epsCurrentYear': 2.45,
                               'epsForward': 5.08,
                               'epsTrailingTwelveMonths': 0.61,
                               'esgPopulated': F

In [16]:
## How many records did we get?
len(response2['quoteResponse']['result'])

10

In [17]:
# Create a new DataFrame from all the results in the dictionary
response2_df = pd.DataFrame(response2['quoteResponse']['result'])
response2_df

Unnamed: 0,language,region,quoteType,quoteSourceName,triggerable,currency,shortName,firstTradeDateMilliseconds,fiftyTwoWeekLowChangePercent,priceHint,...,twoHundredDayAverageChangePercent,marketCap,forwardPE,priceToBook,sourceInterval,exchangeDataDelayedBy,averageAnalystRating,symbol,displayName,ipoExpectedDate
0,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,Walt Disney Company (The),-252322200000,0.460121,2,...,-0.054584,311038148608,33.69488,3.54808,15,0,1.9 - Buy,DIS,,
1,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,QuantumScape Corporation,1597671000000,1.144,2,...,-0.225095,10001286144,-52.434784,6.637314,15,0,2.7 - Hold,QS,QuantumScape,
2,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,"Uber Technologies, Inc.",1557495000000,0.348738,2,...,-0.09759,83589767168,-82.14815,5.884069,15,0,1.8 - Buy,UBER,Uber,
3,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,"Lucid Group, Inc.",1600435800000,1.792708,2,...,0.178083,43395203072,,-0.083492,15,0,,LCID,Lucid,2021-07-26
4,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,ReWalk Robotics Ltd.,1410528600000,1.59,4,...,0.429063,120204488,-10.36,1.811189,15,0,2.0 - Buy,RWLK,ReWalk Robotics,
5,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,DraftKings Inc.,1564061400000,0.512034,2,...,-0.037779,21288579072,-28.370968,9.631319,15,0,2.0 - Buy,DKNG,DraftKings,
6,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,"Upstart Holdings, Inc.",1608129000000,13.187084,2,...,1.259976,24945448960,192.07784,34.771812,15,0,1.7 - Buy,UPST,Upstart,2020-12-16
7,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,"NanoViricides, Inc.",1130333400000,1.17193,2,...,0.484671,71215952,-13.755556,2.233045,15,0,1.0 - Strong Buy,NNVC,NanoViricides,
8,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,"Helbiz, Inc.",1575901800000,3.705,2,...,1.851641,1233145600,-31.7191,-17.676893,15,0,1.0 - Strong Buy,HLBZ,Helbiz,
9,en-US,US,EQUITY,Nasdaq Real Time Price,True,USD,"Airbnb, Inc.",1607610600000,0.393333,2,...,0.067467,106422968320,384.75,30.881065,15,0,2.4 - Buy,ABNB,Airbnb,2020-12-10


# 3. STOCK HISTORY

API domain: https://yfapi.net
API path: /v8/finance/spark
API variables:  querystring  {"symbols" : "string1,string2,string3,..."}
                interval  (1 minute to 1 month)
                range     (1 day to 1 month to 5 years)

Pull data (extract) from yahoofinancials API (URL path is /v8/finance/spark) with "symbols" set to comma-separated list of up to 10 stock symbols at a time.  Specify data interval and range of desired time span.

Use the query_string of 10 stock symbols.  Returns 

In [18]:
## Query of /v8/finance/spark API

url = "https://yfapi.net/v8/finance/spark"
## Alternate url
# url = "https://rest.yahoofinanceapi.com/v8/finance/spark"

## Warning: no space between symbols
my_interval = "1d"
my_range = "1mo"
querystring = {"symbols" : query_string,
               "interval": my_interval,
               "range"   : my_range
              }

# Credentials to include
headers = {
    'x-api-key': API_KEY
    }

In [19]:
## Make API request (query string required for this API)
response3 = requests.request("GET", url, headers=headers, params=querystring).json()

# What does the response look like?  We will have to use indexing to get to the 'result' level
pprint.pprint(response3)

{'ABNB': {'chartPreviousClose': 143.7,
          'close': [146.79,
                    161.42,
                    160.35,
                    152.73,
                    154.18,
                    156.02,
                    154.99,
                    156.59,
                    157.2,
                    158.0,
                    165.0,
                    163.93,
                    166.0,
                    165.2,
                    160.32,
                    163.3,
                    166.37,
                    168.15,
                    166.59,
                    161.64,
                    169.29],
          'dataGranularity': 300,
          'end': None,
          'previousClose': None,
          'start': None,
          'symbol': 'ABNB',
          'timestamp': [1629725400,
                        1629811800,
                        1629898200,
                        1629984600,
                        1630071000,
                        1630330200,
                   

In [20]:
## How many records did we get?
len(response3)

10

In [21]:
# Create a new DataFrame from all the results in the dictionary
response3_df = pd.DataFrame(response3)
response3_df

Unnamed: 0,DKNG,LCID,QS,RWLK,NNVC,UPST,ABNB,UBER,HLBZ,DIS
dataGranularity,300,300,300,300,300,300,300,300,300,300
timestamp,"[1629725400, 1629811800, 1629898200, 162998460...","[1629725400, 1629811800, 1629898200, 162998460...","[1629725400, 1629811800, 1629898200, 162998460...","[1629725400, 1629811800, 1629984600, 163007100...","[1629725400, 1629811800, 1629898200, 162998460...","[1629725400, 1629811800, 1629898200, 162998460...","[1629725400, 1629811800, 1629898200, 162998460...","[1629725400, 1629811800, 1629898200, 162998460...","[1629725400, 1629811800, 1629898200, 162998460...","[1629725400, 1629811800, 1629898200, 162998460..."
symbol,DKNG,LCID,QS,RWLK,NNVC,UPST,ABNB,UBER,HLBZ,DIS
previousClose,,,,,,,,,,
chartPreviousClose,52.01,21.67,19.61,1.25,3.71,195.64,143.7,39.95,7.99,175.12
end,,,,,,,,,,
start,,,,,,,,,,
close,"[53.32, 56.47, 60.11, 57.86, 60.01, 59.25, 59....","[21.65, 22.83, 21.81, 21.48, 21.03, 20.63, 19....","[20.59, 21.3, 21.25, 21.27, 21.48, 21.4, 22.0,...","[1.29, 1.41, 1.35, 1.38, 1.41, 1.4, 1.37, 1.39...","[3.8, 3.82, 3.92, 3.91, 3.85, 3.85, 3.87, 3.92...","[202.96, 219.45, 214.14, 216.75, 223.18, 215.6...","[146.79, 161.42, 160.35, 152.73, 154.18, 156.0...","[41.0, 40.17, 41.55, 40.62, 40.69, 39.59, 39.1...","[7.38, 7.07, 6.85, 6.25, 6.2, 6.53, 6.72, 6.74...","[177.66, 178.47, 178.31, 176.56, 180.14, 179.9..."


In [22]:
# Or try to turn the dictionary of dictionaries into a list of dictionaries before
# Feeding it into a dataframe.  Dataframes are best as list of dictionaries.
# THIS STEP WOULD BE CONSIDERED A TRANSFORMATION

list3 = []
for value in response3:
    # print(response3[value])
    list3.append(response3[value])
## Alternate syntax with list comprehension
# list3 = [response3[value] for value in response3]
list3[0]

{'dataGranularity': 300,
 'timestamp': [1629725400,
  1629811800,
  1629898200,
  1629984600,
  1630071000,
  1630330200,
  1630416600,
  1630503000,
  1630589400,
  1630675800,
  1631021400,
  1631107800,
  1631194200,
  1631280600,
  1631539800,
  1631626200,
  1631712600,
  1631799000,
  1631885400,
  1632144600,
  1632254403],
 'symbol': 'DKNG',
 'previousClose': None,
 'chartPreviousClose': 52.01,
 'end': None,
 'start': None,
 'close': [53.32,
  56.47,
  60.11,
  57.86,
  60.01,
  59.25,
  59.29,
  60.21,
  61.07,
  61.02,
  63.58,
  63.39,
  63.67,
  62.46,
  60.46,
  59.05,
  59.84,
  59.97,
  60.42,
  57.0,
  52.77]}

In [23]:
list3_df = pd.DataFrame(list3)
list3_df

Unnamed: 0,dataGranularity,timestamp,symbol,previousClose,chartPreviousClose,end,start,close
0,300,"[1629725400, 1629811800, 1629898200, 162998460...",DKNG,,52.01,,,"[53.32, 56.47, 60.11, 57.86, 60.01, 59.25, 59...."
1,300,"[1629725400, 1629811800, 1629898200, 162998460...",LCID,,21.67,,,"[21.65, 22.83, 21.81, 21.48, 21.03, 20.63, 19...."
2,300,"[1629725400, 1629811800, 1629898200, 162998460...",QS,,19.61,,,"[20.59, 21.3, 21.25, 21.27, 21.48, 21.4, 22.0,..."
3,300,"[1629725400, 1629811800, 1629984600, 163007100...",RWLK,,1.25,,,"[1.29, 1.41, 1.35, 1.38, 1.41, 1.4, 1.37, 1.39..."
4,300,"[1629725400, 1629811800, 1629898200, 162998460...",NNVC,,3.71,,,"[3.8, 3.82, 3.92, 3.91, 3.85, 3.85, 3.87, 3.92..."
5,300,"[1629725400, 1629811800, 1629898200, 162998460...",UPST,,195.64,,,"[202.96, 219.45, 214.14, 216.75, 223.18, 215.6..."
6,300,"[1629725400, 1629811800, 1629898200, 162998460...",ABNB,,143.7,,,"[146.79, 161.42, 160.35, 152.73, 154.18, 156.0..."
7,300,"[1629725400, 1629811800, 1629898200, 162998460...",UBER,,39.95,,,"[41.0, 40.17, 41.55, 40.62, 40.69, 39.59, 39.1..."
8,300,"[1629725400, 1629811800, 1629898200, 162998460...",HLBZ,,7.99,,,"[7.38, 7.07, 6.85, 6.25, 6.2, 6.53, 6.72, 6.74..."
9,300,"[1629725400, 1629811800, 1629898200, 162998460...",DIS,,175.12,,,"[177.66, 178.47, 178.31, 176.56, 180.14, 179.9..."


# 4. Steps beyond

We could save the raw outputs into 3 database tables.

Transformations (either in the database or here in Jupyter Notebook):
From above, we would probably want another table with the stock symbol, timestamp, and closing price.  Do we need 'time of capture' column?
For time stamp, we would likely need the Python code that can transform this Unix time stamp into a MM-DD-YYYY format when inputing into PostGres table.  Or we can ingest this field as-is and then in database perform the calculation.

Transforming: renaming columns, taking fewer columns into new dataframe before loading, calculating new needed columns, dropping duplicates

Load (in the database):
Decide for each raw table above which columns will be needed for the final set of tables.  Make those tables, calculations, etc, and produce the final set of tables.  Set up connection string and method for how to transfer dataframe into database tables.

In [None]:
# TRANSFORM: Need to make each timestamp its own row in the table first, not clumped
# Transforming the timestamp from 'seconds since epoch' to 'mm/dd/yyyy'

In [24]:
# Example: Access the first record's timestamps
list3_df['timestamp'][0]

[1629725400,
 1629811800,
 1629898200,
 1629984600,
 1630071000,
 1630330200,
 1630416600,
 1630503000,
 1630589400,
 1630675800,
 1631021400,
 1631107800,
 1631194200,
 1631280600,
 1631539800,
 1631626200,
 1631712600,
 1631799000,
 1631885400,
 1632144600,
 1632254403]

In [25]:
timestamp1_record1 = list3_df['timestamp'][0][0]
timestamp1_record1

1629725400

In [26]:
# Convert to regular date time
pd.to_datetime(timestamp1_record1, unit='s')

Timestamp('2021-08-23 13:30:00')

# 5. Connect to database
Use the values pulled in from config.py to construct the database connection string.  Connect to the database.

In [33]:
from sqlalchemy import create_engine

# Define the engine string to be used for connecting to your database using the 
#  environment variables found in config.py

database_url = f'postgresql://{username}:{password}@{host}:{port}/{database}'
database_url = f"postgresql://{username}:{password}@{host}:5432/{database}"
### Alternate URLs
# database_url = os.environ.get('DATABASE_URL', 'postgresql://localhost:5432/EmployeeSQL')
# connection_string = f'${username}:${password}@${host}:${port}/${database}'
# engine = create_engine(f'postgresql://{connection_string}')
engine = create_engine(database_url)
# connection = engine.connect()

In [34]:
# Example of how to work with subset of data into a new dataframe
# new_customer_data_df = customer_data_df[['id', 'first_name', 'last_name']].copy()
# new_customer_data_df.head()

# Write dataframe to database, decide whether index is true or false, depending on
#   whether the index should be a column or not (possible primary key column)
# transformed_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)
# premise_transformed_df.to_sql(name='premise', con=engine, if_exists='append', index=True)

# Verify data presence by querying database
pd.read_sql_query('select * from departments', con=engine).head()

Unnamed: 0,dept_no,dept_name
0,d001,Marketing
1,d002,Finance
2,d003,Human Resources
3,d004,Production
4,d005,Development
