In [6]:
import pandas as pd
import json
import requests
from pymongo import MongoClient, DESCENDING
from bson.objectid import ObjectId
import os
from pprint import pprint
import datetime as dt
import numpy as np
from dotenv import dotenv_values

In [13]:
secrets = dotenv_values('.env')

> # Initialize PyMongo Connection

In [None]:
cluster = secrets['mongodb_conn']
client = MongoClient(cluster)

# print(client.list_database_names())

db = client.FinanceApp
balance_sheet_collection = db.balance_sheet
income_collection = db.income_statement
cash_collection = db.cash_flow_statement
company_profile = db.company_profile
historical = db.historical
stock_split = db.stock_split
dailyquote = db.dailyquote

client.server_info()

> # Create Index for Collection

In [18]:
dailyquote.create_index("index_id", unique=True)

'index_id_1'

> # Import API Keys

In [None]:
fmp_api = secrets['fmp_api']
alpha_vantage_api = secrets['alpha_vantage_api']

> # Define function to access elements of an entry in database
Use function to access individual elements of a document in a collection

In [None]:
def access_entry(collection_name, entry_name, entry_value, return_value):
    data = collection_name.find({entry_name:entry_value})

    data = [i[return_value] for i in data]

    return data

> # Get list of tickers in database

In [268]:
balance_list = list(set([i['symbol'] for i in balance_sheet_collection.find()]))
income_list = list(set([i['symbol'] for i in income_collection.find()]))
cash_list = list(set([i['symbol'] for i in cash_collection.find()]))
company_list = list(set([i['symbol'] for i in company_profile.find()]))
list_tickers = [balance_list, income_list, cash_list, company_list]
lengths = [len(i) for i in list_tickers]
missing_tickers = [i for i in list_tickers if len(i) == max(lengths)]

In [184]:
terms_interested = {'Revenue': 'revenue',
                        'Gross margin%': 'grossProfitRatio',
                        'Operating Income': 'operatingIncome',
                        'Operating Margin %': 'operatingIncomeRatio',
                        'Net Income': 'netIncome',
                        'Net Income Margin': 'netIncomeRatio',
                        'Earnings per Share': 'epsdiluted',
                        'Shares Oustanding (diluted)': 'weightedAverageShsOutDil',
                        'Dividends': 'dividendsPaid',
                        'Operating Cash Flow': 'operatingCashFlow',
                        'Cap Spending': 'capitalExpenditure',
                        'Free Cash Flow': 'freeCashFlow',
                        'Free Cash Flow per Share': 'freeCashFlowpershare',
                        'Working Capital': 'totalCurrentAssets - totalCurrentLiabilities',
                        'Net Debt': 'netDebt'
                        }
statements_type = ['Income Statement', 'Cash Flow Statement', 'Balance Sheet']

In [279]:
def read_statement(type_statement, ticker):

    if type_statement == 'Income Statement':
        statement = [i for i in income_collection.find(
            {'symbol': ticker}).sort('date', DESCENDING)]

    elif type_statement == 'Cash Flow Statement':
        statement = [i for i in cash_collection.find(
            {'symbol': ticker}).sort('date', DESCENDING)]

    else:
        statement = [i for i in balance_sheet_collection.find(
            {'symbol': ticker}).sort('date', DESCENDING)]

    return statement

> # Predict Future Growth Financial Metric

In [461]:
def project_metric(df, metric, past_n_years, first_n_years, second_n_years, first_growth=None, second_growth=None):
    projected = [df[metric][-1]]
    if first_growth == 0:
        avg_growth = df[metric].pct_change()[-past_n_years:].mean()
        for i in range(first_n_years+second_n_years):
            projected.append(projected[i] * (1 + avg_growth))
    else:
        for i in range(first_n_years):
            projected.append(projected[i] * (1 + first_growth))
        for i in range(first_n_years, second_n_years + first_n_years):
            projected.append(projected[i] * (1 + second_growth))
    return projected

In [466]:
revenue = 1

if revenue:
    print(revenue)
else:
    print("Revenue is None")

1


> # Insert to database from API per collection

In [None]:
# balance_sheet_collection.create_index('index_id', unique=True)
# cash_collection.create_index('index_id', unique=True)
# income_collection.create_index('index_id', unique=True)
# company_profile.create_index('index_id', unique=True)
# historical.create_index('index_id', unique=True)
stock_split.create_index('index_id', unique=True)

In [None]:
# The web framework gets post_id from the URL and passes it as a string
def get(post_id):
    # Convert from string to ObjectId:
    document = client.db.collection.find_one({'_id': ObjectId(post_id)})

> # Delete database entries (only run when needed)

In [None]:
balance_sheet_collection.delete_many({'symbol':'AF'})
cash_collection.delete_many({'symbol':'AF'})
income_collection.delete_many({'symbol':'AF'})

In [None]:
balance_sheet_collection.delete_many({'symbol':'AAL','calendarYear':'2021'})
cash_collection.delete_many({'symbol':'AAL','calendarYear':'2021'})
income_collection.delete_many({'symbol':'AAL','calendarYear':'2021'})

> # StockSplit

In [4]:
def download_stocksplit(ticker):
    r = requests.get(
        f"https://financialmodelingprep.com/api/v3/historical-price-full/stock_split/{ticker}?apikey={fmp_api}"
    )
    r = r.json()
    return r

In [5]:
aapl_stocksplit = download_stocksplit('AAPL')

In [6]:
for i in aapl_stocksplit['historical']:
    i['index_id'] = f"{aapl_stocksplit['symbol']}_{i['date']}"
    i['date'] = dt.datetime.strptime(i['date'], '%Y-%m-%d')
    i['symbol'] = aapl_stocksplit['symbol']

In [8]:
df_stocksplit = pd.DataFrame.from_records([x for i,x in enumerate(stock_split.find({'symbol':'AAPL'}))], index='date').sort_index()

In [9]:
for i in stock_split.find({'symbol':df['symbol'][0]}):
    print(i['date'], max(df['4. close']))

2020-08-31 00:00:00 99.9900
2014-06-09 00:00:00 99.9900
2005-02-28 00:00:00 99.9900
2000-06-21 00:00:00 99.9900
1987-06-16 00:00:00 99.9900


> # Stock Price

In [11]:
def get_daily_quote(ticker):
    url = "https://alpha-vantage.p.rapidapi.com/query"
    endpoint='/query?function=GLOBAL_QUOTE&symbol=NVDA&datatype=json'
    headers={"X-RapidAPI-Key": f"{alpha_vantage_api}",
            "X-RapidAPI-Host": "alpha-vantage.p.rapidapi.com"}
    querystring = {"function":"GLOBAL_QUOTE",
                    "symbol":f"{ticker}",
                    "datatype":"json"}
    r = requests.request("GET", url=url, headers=headers, params=querystring)
    r = r.json()
    return r

In [14]:
get_daily_quote('NVDA')

{'Global Quote': {'01. symbol': 'NVDA',
  '02. open': '265.8400',
  '03. high': '270.8000',
  '04. low': '264.2700',
  '05. price': '270.3700',
  '06. volume': '39765434',
  '07. latest trading day': '2023-04-06',
  '08. previous close': '268.8100',
  '09. change': '1.5600',
  '10. change percent': '0.5803%'}}

In [143]:
def stock_price_api(ticker):
    url = "https://alpha-vantage.p.rapidapi.com/query"
    headers = {"X-RapidAPI-Key": f"{alpha_vantage_api}",
               "X-RapidAPI-Host": "alpha-vantage.p.rapidapi.com"}
    querystring = {"function":"TIME_SERIES_DAILY","symbol":f"{ticker}","outputsize":"full","datatype":"json"}
    response = requests.request("GET", url=url, headers=headers, params=querystring)
    return response.json()

In [144]:
file = stock_price_api('AAPL')

In [10]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Candlestick(
        x=df.index[4000:], open=df[f'1. open'], high=df['2. high'], low=df['3. low'], close=df[f'4. close']),
    secondary_y=False,
)
# for i in stock_split.find({'symbol':df['symbol'][0]}):
fig.add_shape(type="line",
    x0=df_stocksplit.index[-1], x1=df_stocksplit.index[-1], y0=max(df['4. close']),
    line=dict(color="RoyalBlue",width=3)
)
# fig.add_trace(
#     go.Bar(
#         x=df.index, y=df['5. volume'], marker=dict({'color': 'darkorange'}), texttemplate="%{value:,}", textposition="inside", name="Volume"),
#     secondary_y=True,
# )


> # Change price entries to double format

In [177]:
historical.update_many({},
    [
    {
        '$set': {
            'open': {
                '$toDouble': '$open'
            }, 
            'high': {
                '$toDouble': '$high'
            }, 
            'low': {
                '$toDouble': '$low'
            }, 
            'close': {
                '$toDouble': '$close'
            }, 
            'volume': {
                '$toDouble': '$volume'
            }
        }
    }
])


<pymongo.results.UpdateResult at 0x1c0c5124970>

In [None]:
# Update date from string to datetime format
historical.update_many({},[
    {
        '$set': {
            'date': {
                '$dateFromString': {
                    'dateString': '$date', 
                    'format': '%Y-%m-%d'
                }
            }
        }
    }
])

In [171]:
# check to see if field exists
[i for i in historical.find({ 'symbol': { '$exists': False } })]

[]

> # CPI Data

In [254]:
# get CPI data
## interval=monthly and semiannual are accepted.
def get_cpi(interval):
    url = f'https://www.alphavantage.co/query?function=CPI&interval={interval}&apikey={alpha_vantage_api}'
    r = requests.get(url)
    data = r.json()

    yield data
    
for i in get_cpi('semiannual'):
    pprint(i)

{'data': [{'date': '2022-07-01', 'value': '296.963'},
          {'date': '2022-01-01', 'value': '288.347'},
          {'date': '2021-07-01', 'value': '275.703'},
          {'date': '2021-01-01', 'value': '266.236'},
          {'date': '2020-07-01', 'value': '260.065'},
          {'date': '2020-01-01', 'value': '257.557'},
          {'date': '2019-07-01', 'value': '256.903'},
          {'date': '2019-01-01', 'value': '254.412'},
          {'date': '2018-07-01', 'value': '252.125'},
          {'date': '2018-01-01', 'value': '250.089'},
          {'date': '2017-07-01', 'value': '246.163'},
          {'date': '2017-01-01', 'value': '244.076'},
          {'date': '2016-07-01', 'value': '241.237'},
          {'date': '2016-01-01', 'value': '238.778'},
          {'date': '2015-07-01', 'value': '237.769'},
          {'date': '2015-01-01', 'value': '236.265'},
          {'date': '2014-07-01', 'value': '237.088'},
          {'date': '2014-01-01', 'value': '236.384'},
          {'date': '2013-07-

> # GDP Data

In [253]:
# get real GDP data
## interval=quarterly and annual are accepted.
def get_GDP(interval):
    url = f'https://www.alphavantage.co/query?function=REAL_GDP&interval={interval}&apikey={alpha_vantage_api}'
    r = requests.get(url)
    data = r.json()

    yield data

for i in get_GDP('annual'):
    pprint(i)

{'data': [{'date': '2022-01-01', 'value': '20015.379'},
          {'date': '2021-01-01', 'value': '19609.812'},
          {'date': '2020-01-01', 'value': '18509.143'},
          {'date': '2019-01-01', 'value': '19036.052'},
          {'date': '2018-01-01', 'value': '18609.078'},
          {'date': '2017-01-01', 'value': '18076.651'},
          {'date': '2016-01-01', 'value': '17680.274'},
          {'date': '2015-01-01', 'value': '17390.295'},
          {'date': '2014-01-01', 'value': '16932.051'},
          {'date': '2013-01-01', 'value': '16553.348'},
          {'date': '2012-01-01', 'value': '16253.97'},
          {'date': '2011-01-01', 'value': '15891.534'},
          {'date': '2010-01-01', 'value': '15648.991'},
          {'date': '2009-01-01', 'value': '15236.262'},
          {'date': '2008-01-01', 'value': '15642.962'},
          {'date': '2007-01-01', 'value': '15623.871'},
          {'date': '2006-01-01', 'value': '15315.943'},
          {'date': '2005-01-01', 'value': '14901.

> # Treasury Yield

In [255]:
# get treasury yield
## interval=daily, weekly, and monthly are accepted
## maturity=3month, 2year, 5year, 7year, 10year, and 30year are accepted
def get_treasury(interval, maturity):
    url = f'https://www.alphavantage.co/query?function=TREASURY_YIELD&interval={interval}&maturity={maturity}&apikey={alpha_vantage_api}'
    r = requests.get(url)
    data = r.json()

    yield data

for i in get_treasury('monthly','2year'):
    pprint(i)

{'data': [{'date': '2023-02-01', 'value': '4.53'},
          {'date': '2023-01-01', 'value': '4.21'},
          {'date': '2022-12-01', 'value': '4.29'},
          {'date': '2022-11-01', 'value': '4.50'},
          {'date': '2022-10-01', 'value': '4.38'},
          {'date': '2022-09-01', 'value': '3.86'},
          {'date': '2022-08-01', 'value': '3.25'},
          {'date': '2022-07-01', 'value': '3.04'},
          {'date': '2022-06-01', 'value': '3.00'},
          {'date': '2022-05-01', 'value': '2.62'},
          {'date': '2022-04-01', 'value': '2.54'},
          {'date': '2022-03-01', 'value': '1.91'},
          {'date': '2022-02-01', 'value': '1.44'},
          {'date': '2022-01-01', 'value': '0.98'},
          {'date': '2021-12-01', 'value': '0.68'},
          {'date': '2021-11-01', 'value': '0.51'},
          {'date': '2021-10-01', 'value': '0.39'},
          {'date': '2021-09-01', 'value': '0.24'},
          {'date': '2021-08-01', 'value': '0.22'},
          {'date': '2021-07-01'

> # Federal funds rate

In [252]:
# get federal funds rate
## interval=daily, weekly, and monthly are accepted.
def get_fed_rate(interval):
    url = f'https://www.alphavantage.co/query?function=FEDERAL_FUNDS_RATE&interval={interval}&apikey={alpha_vantage_api}'
    r = requests.get(url)
    data = r.json()

    yield data

for i in get_fed_rate('monthly'):
    pprint(i)

{'data': [{'date': '2023-02-01', 'value': '4.57'},
          {'date': '2023-01-01', 'value': '4.33'},
          {'date': '2022-12-01', 'value': '4.10'},
          {'date': '2022-11-01', 'value': '3.78'},
          {'date': '2022-10-01', 'value': '3.08'},
          {'date': '2022-09-01', 'value': '2.56'},
          {'date': '2022-08-01', 'value': '2.33'},
          {'date': '2022-07-01', 'value': '1.68'},
          {'date': '2022-06-01', 'value': '1.21'},
          {'date': '2022-05-01', 'value': '0.77'},
          {'date': '2022-04-01', 'value': '0.33'},
          {'date': '2022-03-01', 'value': '0.20'},
          {'date': '2022-02-01', 'value': '0.08'},
          {'date': '2022-01-01', 'value': '0.08'},
          {'date': '2021-12-01', 'value': '0.08'},
          {'date': '2021-11-01', 'value': '0.08'},
          {'date': '2021-10-01', 'value': '0.08'},
          {'date': '2021-09-01', 'value': '0.08'},
          {'date': '2021-08-01', 'value': '0.09'},
          {'date': '2021-07-01'

> # Retail sales

In [256]:
# get retail sales data

def get_retail_sales():
    url = f'https://www.alphavantage.co/query?function=RETAIL_SALES&apikey={alpha_vantage_api}'
    r = requests.get(url)
    data = r.json()

    yield data

for i in get_retail_sales():
    pprint(i)

{'data': [{'date': '2023-01-01', 'value': '540735'},
          {'date': '2022-12-01', 'value': '657682'},
          {'date': '2022-11-01', 'value': '609862'},
          {'date': '2022-10-01', 'value': '597349'},
          {'date': '2022-09-01', 'value': '577370'},
          {'date': '2022-08-01', 'value': '613416'},
          {'date': '2022-07-01', 'value': '600746'},
          {'date': '2022-06-01', 'value': '609933'},
          {'date': '2022-05-01', 'value': '614474'},
          {'date': '2022-04-01', 'value': '594426'},
          {'date': '2022-03-01', 'value': '597077'},
          {'date': '2022-02-01', 'value': '506400'},
          {'date': '2022-01-01', 'value': '518146'},
          {'date': '2021-12-01', 'value': '632849'},
          {'date': '2021-11-01', 'value': '579687'},
          {'date': '2021-10-01', 'value': '557737'},
          {'date': '2021-09-01', 'value': '532840'},
          {'date': '2021-08-01', 'value': '553822'},
          {'date': '2021-07-01', 'value': '554

> # Durables data

In [257]:
# get durables data
def get_durables():
    url = 'https://www.alphavantage.co/query?function=DURABLES&apikey=demo'
    r = requests.get(url)
    data = r.json()

    yield data
    
for i in get_durables():
    pprint(i)

{'data': [{'date': '2023-01-01', 'value': '250074'},
          {'date': '2022-12-01', 'value': '296687'},
          {'date': '2022-11-01', 'value': '259606'},
          {'date': '2022-10-01', 'value': '273120'},
          {'date': '2022-09-01', 'value': '289932'},
          {'date': '2022-08-01', 'value': '279323'},
          {'date': '2022-07-01', 'value': '249456'},
          {'date': '2022-06-01', 'value': '294470'},
          {'date': '2022-05-01', 'value': '263971'},
          {'date': '2022-04-01', 'value': '260366'},
          {'date': '2022-03-01', 'value': '294088'},
          {'date': '2022-02-01', 'value': '250105'},
          {'date': '2022-01-01', 'value': '242492'},
          {'date': '2021-12-01', 'value': '267477'},
          {'date': '2021-11-01', 'value': '244327'},
          {'date': '2021-10-01', 'value': '247086'},
          {'date': '2021-09-01', 'value': '260121'},
          {'date': '2021-08-01', 'value': '250971'},
          {'date': '2021-07-01', 'value': '228

> # Unemployment

In [259]:
def get_unemployment():
    url = f'https://www.alphavantage.co/query?function=UNEMPLOYMENT&apikey={alpha_vantage_api}'
    r = requests.get(url)
    data = r.json()

    yield data

for i in get_unemployment():
    pprint(i)

{'data': [{'date': '2023-01-01', 'value': '3.4'},
          {'date': '2022-12-01', 'value': '3.5'},
          {'date': '2022-11-01', 'value': '3.6'},
          {'date': '2022-10-01', 'value': '3.7'},
          {'date': '2022-09-01', 'value': '3.5'},
          {'date': '2022-08-01', 'value': '3.7'},
          {'date': '2022-07-01', 'value': '3.5'},
          {'date': '2022-06-01', 'value': '3.6'},
          {'date': '2022-05-01', 'value': '3.6'},
          {'date': '2022-04-01', 'value': '3.6'},
          {'date': '2022-03-01', 'value': '3.6'},
          {'date': '2022-02-01', 'value': '3.8'},
          {'date': '2022-01-01', 'value': '4.0'},
          {'date': '2021-12-01', 'value': '3.9'},
          {'date': '2021-11-01', 'value': '4.2'},
          {'date': '2021-10-01', 'value': '4.5'},
          {'date': '2021-09-01', 'value': '4.8'},
          {'date': '2021-08-01', 'value': '5.2'},
          {'date': '2021-07-01', 'value': '5.4'},
          {'date': '2021-06-01', 'value': '5.9'},


> # Non-farm payroll

In [261]:
def get_nonfarm_payroll():
    url = f'https://www.alphavantage.co/query?function=NONFARM_PAYROLL&apikey={alpha_vantage_api}'
    r = requests.get(url)
    data = r.json()

    yield data

for i in get_nonfarm_payroll():
    pprint(i)

{'data': [{'date': '2023-01-01', 'value': '152844'},
          {'date': '2022-12-01', 'value': '155349'},
          {'date': '2022-11-01', 'value': '155642'},
          {'date': '2022-10-01', 'value': '155041'},
          {'date': '2022-09-01', 'value': '153809'},
          {'date': '2022-08-01', 'value': '153285'},
          {'date': '2022-07-01', 'value': '152875'},
          {'date': '2022-06-01', 'value': '153217'},
          {'date': '2022-05-01', 'value': '152291'},
          {'date': '2022-04-01', 'value': '151449'},
          {'date': '2022-03-01', 'value': '150411'},
          {'date': '2022-02-01', 'value': '149606'},
          {'date': '2022-01-01', 'value': '147932'},
          {'date': '2021-12-01', 'value': '150740'},
          {'date': '2021-11-01', 'value': '150543'},
          {'date': '2021-10-01', 'value': '149605'},
          {'date': '2021-09-01', 'value': '147917'},
          {'date': '2021-08-01', 'value': '147159'},
          {'date': '2021-07-01', 'value': '146

> # Stock Peers

In [526]:
def stock_peers(ticker):
    r = requests.get(
        f"https://financialmodelingprep.com/api/v4/stock_peers?symbol={ticker}&apikey={fmp_api}"
    )
    r = r.json()
    return r

> # Company Profile

In [555]:
def get_company_profile(ticker):
    url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={ticker}&apikey={alpha_vantage_api}'
    r = requests.get(url)
    r = r.json()

    return r
    

In [560]:
get_company_profile('')

{}

># News Sentiment

In [528]:
def news_sentiment():
    url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=AAPL&apikey={alpha_vantage_api}'
    r = requests.get(url)
    r = r.json()

    return r

In [530]:
AAPL_sentiment = news_sentiment()

In [550]:
[{item:i[item] for item in i if item == 'title' or item == 'overall_sentiment_score'} for i in AAPL_sentiment['feed']]

[{'title': 'Futures: Bitcoin, Tesla In Focus After Mixed Stock Market Action',
  'overall_sentiment_score': 0.071357},
 {'title': 'Stock Market Today: Stocks Finish Mostly Higher After First Citizens Buys SVB Assets',
  'overall_sentiment_score': 0.149302},
 {'title': 'P/E Ratio Insights for Apple - Apple  ( NASDAQ:AAPL ) ',
  'overall_sentiment_score': 0.128615},
 {'title': 'Dow Jones Gains As Bank Crisis Eases; These 3 Warren Buffett Stocks Eye Entries',
  'overall_sentiment_score': 0.185599},
 {'title': '3 charts show U.S. bank failures causing stock-market pain beneath the surface',
  'overall_sentiment_score': -0.130268},
 {'title': "Apple's leap into the metaverse could jolt a sputtering market, but it won't happen overnight",
  'overall_sentiment_score': -0.002422},
 {'title': "Elon Musk's Neuralink vs. Bill Gates and Jeff Bezos' Synchron: Billionaires Race to Unlock Your Brain",
  'overall_sentiment_score': 0.170517},
 {'title': 'Apple Snaps AI Startup WaveOne Specializing In C

In [547]:
AAPL_sentiment['feed'][0]['ticker_sentiment']

[{'ticker': 'META',
  'relevance_score': '0.059268',
  'ticker_sentiment_score': '0.119',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'AAPL',
  'relevance_score': '0.088799',
  'ticker_sentiment_score': '0.045161',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'C',
  'relevance_score': '0.059268',
  'ticker_sentiment_score': '0.146002',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'CSCO',
  'relevance_score': '0.147456',
  'ticker_sentiment_score': '0.077708',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'JPM',
  'relevance_score': '0.059268',
  'ticker_sentiment_score': '0.146002',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'NKE',
  'relevance_score': '0.118209',
  'ticker_sentiment_score': '0.091048',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'ALGN',
  'relevance_score': '0.118209',
  'ticker_sentiment_score': '0.053693',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'COIN',
  'relevance_score': '0.059268',
  'ticker_sentiment_scor

> # Twitter API

In [565]:
def get_tweets_by_search():
    url = "https://twitter135.p.rapidapi.com/Search/"

    querystring = {"q":"Apple","count":"20"}

    headers = {
        "X-RapidAPI-Key": f"{alpha_vantage_api}",
        "X-RapidAPI-Host": "twitter135.p.rapidapi.com"
    }

    response = requests.request("GET", url, headers=headers, params=querystring)
    r = requests.get(url)

    pprint(response.text)

get_tweets_by_search()

('{"globalObjects":{"tweets":{"1641133342195056668":{"created_at":"Wed Mar 29 '
 '17:40:54 +0000 '
 '2023","id":1641133342195056668,"id_str":"1641133342195056668","full_text":"aaron '
 'hibell who created a remaster of the tetris theme song into the song '
 'benevolence (which is a song on the tetris apple tv film soundtrack) said in '
 'a follow up comment that aespa\\u2019s song hold on tight will be a version '
 'of the same song!\\n\\nhold on tight is going to be a banger! '
 'href=\\"http:\\/\\/twitter.com\\/download\\/iphone\\" '
 'rel=\\"nofollow\\"\\u003eTwitter for '
 'iPhone\\u003c\\/a\\u003e","in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user_id":1207093837786701824,"user_id_str":"1207093837786701824","geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":192,"favorite_count":463,"reply_count":1,"quote_count":70,"conversati

# SEC API

In [3]:
# Get CIK list from SEC.gov
def get_cik_list() -> pd.DataFrame:
    url = r"https://www.sec.gov/files/company_tickers.json"
    cik_raw = requests.get(url)
    cik_json = cik_raw.json()
    cik_df = pd.DataFrame.from_dict(cik_json).T
    return cik_df

In [4]:
# Get specific ticker's CIK number
def get_ticker_cik(ticker: str, cik_df: pd.DataFrame):
    ticker_cik = cik_df.query(f"ticker == '{ticker}'")['cik_str']
    cik = f"{ticker_cik[0]:010d}"
    return cik

In [76]:
def get_submissions(cik):
    url = f"https://data.sec.gov/submissions/CIK{cik}.json"
    response = requests.get(url)
    data = json.loads(response.text)
    return data

def get_company_concept(cik, taxonomy, tag):
    url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/{taxonomy}/{tag}.json"
    response = requests.get(url)
    data = json.loads(response.text)
    return data

def get_company_facts(cik):
    url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json"
    response = requests.get(url)
    data = json.loads(response.text)
    return data

def get_frames(taxonomy, tag, unit, period):
    url = f"https://data.sec.gov/api/xbrl/frames/{taxonomy}/{tag}/{unit}/{period}.json"
    response = requests.get(url)
    data = json.loads(response.text)
    return data

headers={"User-Agent": "<Sample Company Name> <Admin Contact>@<Sample Company Domain>",
         "Accept-Encoding": "gzip, deflate",
         "Host": "data.sec.gov"}


In [6]:
cik_df = get_cik_list()

In [34]:
from bs4 import BeautifulSoup
from pyrate_limiter import Duration, Limiter, RequestRate
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from weakref import finalize
from typing import Any, Dict, List

In [77]:
# Structure based on sec-edgar-api
JSONType = Dict[str, Any]
SubmissionsType = Dict[str, List[str]]

# Rate limiter
rate = RequestRate(10, Duration.SECOND)
limiter = Limiter(rate)

# Specify max number of request retries
retries = Retry(
    total=10,
    backoff_factor=1/10, # 1/max requests per second
    status_forcelist=[403, 500, 502, 503, 504],
)

session = requests.Session()
session.headers.update(headers)
session.mount("http://", HTTPAdapter(max_retries=retries))
session.mount("https://", HTTPAdapter(max_retries=retries))

@limiter.ratelimit(delay=True)
def rate_limited_get(session, url: str) -> JSONType:
    """Make a rate-limited GET request.

    SEC limits users to a maximum of 10 requests per second.
    Source: https://www.sec.gov/developer
    """
    resp = session.get(url)
    return resp

In [None]:
AAPL_cik = get_ticker_cik('AAPL', cik_df=cik_df)
assert len(AAPL_cik) == 10, "CIK number must be 10 digits long."
# url = f"https://data.sec.gov/submissions/CIK{AAPL_cik}.json"
url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{AAPL_cik}/us-gaap/AccountsPayableCurrent.json"
submissions = requests.get(url, headers=headers)
submissions.json()

# Download Company Filings

In [71]:
"""Constants used throughout the package."""
from pathlib import Path
from datetime import date

SEC_EDGAR_SEARCH_API_ENDPOINT = "https://efts.sec.gov/LATEST/search-index"
SEC_EDGAR_ARCHIVES_BASE_URL = "https://www.sec.gov/Archives/edgar/data"

# SEC limits users to no more than 10 requests per second
# Sleep 0.1s between each request to prevent rate-limiting
# Source: https://www.sec.gov/developer
SEC_EDGAR_RATE_LIMIT_SLEEP_INTERVAL = 0.1

# Number of times to retry a request to sec.gov
MAX_RETRIES = 10

DATE_FORMAT_TOKENS = "%Y-%m-%d"
DEFAULT_BEFORE_DATE = date.today()
DEFAULT_AFTER_DATE = date(2000, 1, 1)

ROOT_SAVE_FOLDER_NAME = "sec-edgar-filings"
FILING_FULL_SUBMISSION_FILENAME = "full-submission.txt"
# Extension will vary based on form (e.g. form 4 is XML, 8-K is HTML)
FILING_DETAILS_FILENAME_STEM = "filing-details"

In [81]:
download_headers = {"User-Agent": "<Sample Company Name> <Admin Contact>@<Sample Company Domain>",
         "Accept-Encoding": "gzip, deflate",
         "Host": "www.sec.gov"}

url = r"https://sec.gov/Archives/edgar/data/320193/000032019323000064/0000320193-23-000064.txt"
submissions = requests.get(url, headers=download_headers)
save_path = (
        Path.cwd()
        / FILING_FULL_SUBMISSION_FILENAME
    )
# save_path.write_bytes(submissions.content)



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,Three Months Ended,Three Months Ended,Three Months Ended,Three Months Ended,Three Months Ended,Three Months Ended,Three Months Ended,Three Months Ended,Three Months Ended,...,Nine Months Ended,Nine Months Ended,Nine Months Ended,Nine Months Ended,Nine Months Ended,Nine Months Ended,Nine Months Ended,Nine Months Ended,Nine Months Ended,Nine Months Ended
3,,"July 1, 2017","July 1, 2017","July 1, 2017",,"June 25, 2016","June 25, 2016","June 25, 2016",,Change,...,"July 1, 2017","July 1, 2017","July 1, 2017",,"June 25, 2016","June 25, 2016","June 25, 2016",,Change,Change
4,Net Sales by Operating Segment:,,,,,,,,,,...,,,,,,,,,,
5,Americas,$,20376,,,$,17963,,,13,...,$,73501,,,$,66384,,,11,%
6,Europe,10675,10675,,,9643,9643,,,11,...,41929,41929,,,39110,39110,,,7,%
7,Greater China,8004,8004,,,8848,8848,,,(10,...,34963,34963,,,39707,39707,,,(12,)%
8,Japan,3624,3624,,,3529,3529,,,3,...,13875,13875,,,12604,12604,,,10,%
9,Rest of Asia Pacific,2729,2729,,,2375,2375,,,15,...,12387,12387,,,10982,10982,,,13,%
