# API Calls and Database Creation

### Attribution: [Data provided by Financial Modeling Prep](https://financialmodelingprep.com/developer/docs/), Yahoo! Finance

In [15]:
import yfinance as yf
from urllib.request import urlopen
import certifi
import json
from sqlalchemy import create_engine
import psycopg2
import numpy as np
import pandas as pd
import os
import configparser
import ssl

### Tickers

In [12]:
# Retrieve Current S&P Stock Tickers - as of 08/22/2024
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
sp500_tickers = pd.read_html(url)[0]["Symbol"].to_list()

# Handling Errors from Wiki
sp500_tickers.remove("BRK.B")
sp500_tickers.remove("BF.B")
sp500_tickers.extend(["BRK-B", "BF-B"])

### Yfinance API

In [13]:
# Download and format the stock data without multi-index, "Date" column is currently stored as a datetime obj

# Download using yfinance API
df = yf.download(sp500_tickers, start="2023-01-01", end="2024-08-22", interval="1d")

# Formatting
df.sort_index(axis=1, inplace=True)
df = df.stack(future_stack=True).reset_index()  # future_stack required due to future deprecation
df.columns.name = None

# Visual Inspection - instead of multi-index, each stock shown for each day, future transformations require groupby(["Ticker").transform()
print(df.shape)
df.head()

[*********************100%%**********************]  503 of 503 completed


(206733, 8)


Unnamed: 0,Date,Ticker,Adj Close,Close,High,Low,Open,Volume
0,2023-01-03,A,148.440598,150.039993,153.130005,148.470001,151.960007,1414300.0
1,2023-01-03,AAL,12.74,12.74,13.0,12.53,12.91,21865100.0
2,2023-01-03,AAPL,123.904617,125.07,130.899994,124.169998,130.279999,112117500.0
3,2023-01-03,ABBV,151.773148,162.380005,163.020004,160.809998,162.039993,4937500.0
4,2023-01-03,ABNB,84.900002,84.900002,88.139999,84.300003,87.385002,3901200.0


### Financial Modeling Prep

In [14]:
# FMP requires an API key, using my environment to protect the specific key
api_key = os.getenv('STOCK_API_KEY')

In [32]:
from urllib.request import urlopen
import certifi
import json
import ssl


def get_jsonparsed_data(url):
    """
    The cafile=certifi.where() part ensures that urlopen uses the certificate bundle provided by certifi 
    to verify the SSL certificate of the URL you're trying to access. Then read the api response and return it in json-form

    Args:
    ------------
    url:str | url with api key to successfully download information

    Returns:
    ------------
    json.loads(data):json | json object with the results of the query

    Raises:
    ------------
    HTTPError | if any part of the query string is inaccurate, users must have their own API key "HTTP Error 401: Unauthorized"

    Example : 
    url = ("https://financialmodelingprep.com/api/v3/search?query=AA&apikey=YOUR_API_KEY")
    get_jsonparsed_data(url)
    """

    # Create a custom SSL context
    context = ssl.create_default_context(cafile=certifi.where())
    
    # Initialize response from FMP
    response = urlopen(url, context=context)  
    
    # Parse the response
    data = response.read().decode("utf-8")  # read the API response
    
    # Return as JSON object
    return json.loads(data)

# Demonstrate returned object for AAPL

## Options:
# "https://financialmodelingprep.com/api/v3/profile/AAPL?apikey=YOUR_API_KEY"  -> company profile!
# "https://financialmodelingprep.com/api/v3/search?query=AAPL&apikey={api_key}" -> symbol|name|currency|exchange ONLY

# Profile
url = (f"https://financialmodelingprep.com/api/v3/profile/AAPL?apikey={api_key}")
aapl_profile = get_jsonparsed_data(url)
len(aapl_profile), type(aapl_profile[0]), aapl_profile

(1,
 dict,
 [{'symbol': 'AAPL',
   'price': 226.69,
   'beta': 1.244,
   'volAvg': 64811409,
   'mktCap': 3446617429000,
   'lastDiv': 1,
   'range': '164.08-237.23',
   'changes': 2.16,
   'companyName': 'Apple Inc.',
   'currency': 'USD',
   'cik': '0000320193',
   'isin': 'US0378331005',
   'cusip': '037833100',
   'exchange': 'NASDAQ Global Select',
   'exchangeShortName': 'NASDAQ',
   'industry': 'Consumer Electronics',
   'website': 'https://www.apple.com',
   'description': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and downlo

In [78]:
# Function to create the table of stock profiles
def create_sp500_table():
    """
    Initialize a database for the FMP data about company profiles

    Args:
    ------------------
    table_name| system variables are required in the call, but not passed directly to the function
    
    Returns:
    ------------------
    None| creates postgreSQL database, managed by postgreSQL

    Errors:
    ------------------
    Authorization Error: if system/environment variables are not already loaded in the script connection may not be made to db
    """
    with psycopg2.connect(
        dbname=db_name,
        user=db_user,
        password=db_password,
        host=db_host,
        port=db_port
    ) as connection:
        with connection.cursor() as cursor:
            
            create_table_query = """
            CREATE TABLE IF NOT EXISTS sp500_profiles (
                id SERIAL,
                asof_date DATE,
                symbol VARCHAR(10) PRIMARY KEY,
                company_name TEXT,
                exchange TEXT,
                sector TEXT,
                industry TEXT,
                mktCap BIGINT,
                description TEXT,
                ceo TEXT,
                ipo_date DATE,
                website TEXT
            );
            """
            cursor.execute(create_table_query)
            connection.commit()
            print("Table sp500_profiles created successfully.")

# Create the Table
create_sp500_table()

Table sp500_profiles created successfully.


In [26]:
import json
import ssl
import certifi
import psycopg2
from urllib.request import urlopen


# Function to insert data into PostgreSQL table
def insert_data_to_postgres(connection, stock_profile):
    """
    Insert new rows of data into the FMP-sourced table, using API call

    Args:
    ----------------
    connection:psycopg2.extensions.connection | connection to the database via psycopg2
    stock_profile:dict | api json response from FMP

    Returns:
    ------------------
    None | inserts rows into the project dataframe

    Errors:
    ------------------
    Authorization Error: if system/environment variables are not already loaded in the script connection may not be made to db
    """
    with connection.cursor() as cursor:
        insert_query = """
        INSERT INTO sp500_profiles (symbol, asof_date, company_name, exchange, sector, industry, mktCap, description, ceo, ipo_date, website)
        VALUES (%s, CURRENT_DATE, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """
        cursor.execute(insert_query, (
            stock_profile.get("symbol"),
            stock_profile.get("companyName"),
            stock_profile.get("exchange"),
            stock_profile.get("sector"),
            stock_profile.get("industry"),
            stock_profile.get("mktCap"),
            stock_profile.get("description"),
            stock_profile.get("ceo"),
            stock_profile.get("ipoDate"),
            stock_profile.get("website"),
        ))
        connection.commit()

In [79]:
# System Variables
db_name = os.getenv('stock_db')
db_user = os.getenv('db_user')
db_password = os.getenv('db_password')
db_host = os.getenv('db_host')
db_port = os.getenv('db_port')


# Verifying database connection 

# connection parameters
with psycopg2.connect(
            dbname=db_name,
            user=db_user,
            password=db_password,
            host=db_host,
            port=db_port
        ) as connection:
        
        # Check the connection validity
        with connection.cursor() as cursor:

            # Simple query to check connection
            cursor.execute("SELECT version();")  
            db_version = cursor.fetchone()[0]
            print(f"Connected to database: {db_name}")
            print(f"Database version: {db_version}")


# Retrieving Database Schema
with psycopg2.connect(
        dbname=db_name,
        user=db_user,
        password=db_password,
        host=db_host,
        port=db_port
    ) as connection:
        with connection.cursor() as cursor:
            cursor.execute("""SELECT * FROM information_schema.columns WHERE table_name = 'sp500_profiles';""")
            display(pd.DataFrame(cursor.fetchall(), columns = [desc[0] for desc in cursor.description]))
            print(type(cursor), type(connection))

Connected to database: project_database
Database version: PostgreSQL 16.4 (Postgres.app) on aarch64-apple-darwin21.6.0, compiled by Apple clang version 14.0.0 (clang-1400.0.29.102), 64-bit


Unnamed: 0,table_catalog,table_schema,table_name,column_name,ordinal_position,column_default,is_nullable,data_type,character_maximum_length,character_octet_length,...,is_identity,identity_generation,identity_start,identity_increment,identity_maximum,identity_minimum,identity_cycle,is_generated,generation_expression,is_updatable
0,project_database,public,sp500_profiles,asof_date,2,,YES,date,,,...,NO,,,,,,NO,NEVER,,YES
1,project_database,public,sp500_profiles,mktcap,8,,YES,bigint,,,...,NO,,,,,,NO,NEVER,,YES
2,project_database,public,sp500_profiles,ipo_date,11,,YES,date,,,...,NO,,,,,,NO,NEVER,,YES
3,project_database,public,sp500_profiles,id,1,nextval('sp500_profiles_id_seq'::regclass),NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
4,project_database,public,sp500_profiles,sector,6,,YES,text,,1073742000.0,...,NO,,,,,,NO,NEVER,,YES
5,project_database,public,sp500_profiles,industry,7,,YES,text,,1073742000.0,...,NO,,,,,,NO,NEVER,,YES
6,project_database,public,sp500_profiles,description,9,,YES,text,,1073742000.0,...,NO,,,,,,NO,NEVER,,YES
7,project_database,public,sp500_profiles,ceo,10,,YES,text,,1073742000.0,...,NO,,,,,,NO,NEVER,,YES
8,project_database,public,sp500_profiles,website,12,,YES,text,,1073742000.0,...,NO,,,,,,NO,NEVER,,YES
9,project_database,public,sp500_profiles,symbol,3,,NO,character varying,10.0,40.0,...,NO,,,,,,NO,NEVER,,YES


<class 'psycopg2.extensions.cursor'> <class 'psycopg2.extensions.connection'>


<hr style="border: 3px solid blue">

### Testing 

In [33]:
# Adding a single entry to the db

with psycopg2.connect(
    dbname=db_name,
    user=db_user,
    password=db_password,
    host=db_host,
    port=db_port
) as connection:

    insert_data_to_postgres(connection, aapl_profile[0])

In [77]:
# using psycopg2 to verify row entry

with psycopg2.connect(
        dbname=db_name,
        user=db_user,
        password=db_password,
        host=db_host,
        port=db_port
    ) as connection:
    with connection.cursor() as cursor:
        cursor.execute("""SELECT * FROM sp500_profiles""")
        print(cursor.fetchall())
        cursor.execute("""DROP TABLE IF EXISTS sp500_profiles""")

[]


<hr style="border:3px solid blue">

### Inserting Values Into the Database


In [81]:
# List of S&P 500 stock symbols (use your list here)
sp500_tickers_1 = sp500_tickers[:200]
sp500_tickers_2 = sp500_tickers[200:400]
sp500_tickers_3 = sp500_tickers[400:]

# Initialize PostgreSQL connection
connection = psycopg2.connect(
    dbname=db_name,
    user=db_user,
    password=db_password,
    host=db_host,
    port=db_port
)

# Iterate over the list of S&P 500 stocks -> maximum of 250 API calls per day -> tread carefully --------------------------
for symbol in sp500_tickers_1:
    url = f"https://financialmodelingprep.com/api/v3/profile/{symbol}?apikey={api_key}"
    stock_data = get_jsonparsed_data(url)

    # Ensure the response is a list and contains data
    if stock_data and isinstance(stock_data, list):
        stock_profile = stock_data[0]  # The API returns a list with one profile
        insert_data_to_postgres(connection, stock_profile)

# Close the PostgreSQL connection
connection.close()

In [85]:
# System Variables
db_name = os.getenv('stock_db')
db_user = os.getenv('db_user')
db_password = os.getenv('db_password')
db_host = os.getenv('db_host')
db_port = os.getenv('db_port')

# Create the connection string
connection_string = f"postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"

# Create the SQLAlchemy engine
engine = create_engine(connection_string)
print(type(engine))
try:
    profile_df = pd.read_sql("""SELECT * FROM sp500_profiles""", engine)
    profile_df.head()
    print(profile_df.shape, profile_df.columns, "LALALALALALA")
except:
    print("No DB Found")

<class 'sqlalchemy.engine.base.Engine'>
(0, 12) Index(['id', 'asof_date', 'symbol', 'company_name', 'exchange', 'sector',
       'industry', 'mktcap', 'description', 'ceo', 'ipo_date', 'website'],
      dtype='object') LALALALALALA
