In [452]:
import os

path_parent = os.path.dirname(os.getcwd())
os.chdir(path_parent)

In [420]:
from urllib.request import urlopen
import json
import numpy as np
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm

#### Auth

In [3]:
with open('app/keys/keys.json', 'r') as key_file:
    keys = json.load(key_file)
    
key = keys['financial_modeling_prep']

#### Functions

In [None]:
def get_jsonparsed_data(url):
    """
    Receive the content of ``url``, parse it as JSON and return the object.

    Parameters
    ----------
    url : str

    Returns
    -------
    dict
    """
    response = urlopen(url)
    data = response.read().decode("utf-8")
    return json.loads(data)

#### Get current S&P500 list

In [389]:
# get current S&P500 list
url = "https://financialmodelingprep.com/api/v3/sp500_constituent?apikey={}".format(key)
sp = get_jsonparsed_data(url)
sp = pd.DataFrame(sp)

sp.head()

Unnamed: 0,symbol,name,sector,subSector,headQuarter,dateFirstAdded,cik,founded
0,FCX,Freeport-McMoRan,Materials,Copper,"Phoenix, Arizona",,831259,1912
1,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1976-08-09,66740,1902
2,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
3,ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
4,ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013


#### Retrieve historical changes of S&P500 list

In [474]:
# get historical changes
url = "https://financialmodelingprep.com/api/v3/historical/sp500_constituent?apikey={}".format(key)
sp_history = get_jsonparsed_data(url)
sp_history = pd.DataFrame(sp_history)
sp_history['dateAdded'] = pd.to_datetime(sp_history['dateAdded'], format="%B %d, %Y")

sp_history = sp_history.loc[sp_history['dateAdded'] >= '2000-01-01']

sp_history.head(4)

Unnamed: 0,dateAdded,addedSecurity,removedTicker,removedSecurity,date,symbol,reason
0,2022-06-21,,UA/UAA,Under Armour,2022-06-21,UA/UAA,Market capitalization change.
1,2022-06-21,Keurig Dr Pepper,,,2022-06-21,KDP,Market capitalization change.
2,2022-06-21,,IPGP,IPG Photonics,2022-06-21,IPGP,Market capitalization change.
3,2022-06-21,ON Semiconductor,,,2022-06-21,ON,Market capitalization change.


#### Create S&P500 list for each time stamp
#### Identify ticker change.

This is needed since the ticker history doesn't match with the current ticker list.<br>
We would have to change the old ticker to the new one.

In [327]:
def get_company_core_info(ticker, key=key):
    url = "https://financialmodelingprep.com/api/v4/company-core-information?symbol={}&apikey={}".format(ticker, key)
    df = get_jsonparsed_data(url)
    df = pd.DataFrame(df)
    return df

Check if all the historical changes can be applied to the S&P500 list

In [397]:
# Check if all the changes in the tickers works properly with the current S&P500 list as it changes
# this is an iterative process

sp500_comp = sp.symbol.tolist().copy()
for i, (_, added, _, removed, d, symbol, _) in sp_history.iterrows():
    try:
        if added == '':
            sp500_comp.append(symbol)
        elif removed == '':
            sp500_comp.remove(symbol)
        else:
            print("Not expected behaviour")
            break
    except:
        print("ERROR - add: {}, remove: {}, d: {}, symbol: {}".format(added, removed, d, symbol))
        break

In [394]:
# add in the company list whenever there is an error from the above process
adjustments = [
    ['QuintilesIMS', 'Q', 'IQV'],
    ['Willis Towers Watson', 'WLTW', 'WTW'],
    ['Discovery Communications', 'DISCK', 'WBD'],
    ['Facebook', 'FB', 'META'],
    ['Michael Kors', 'KORS', 'CPRI'],
    ['Delphi Automotive', 'DLPH', 'APTV'],
    ['Ingersoll-Rand', 'IR', 'TT'],
    ['Priceline.com', 'PCLN', 'BKNG'],
    ['Harris Corporation', 'HRS', 'LHX'],
    ['Cabot Oil & Gas', 'COG', 'CTRA'],
    ['Jacobs Engineering Group', 'JEC', 'J'],
    ['Tesoro Corporation', 'TSO', 'ANDV'],
    ['Leucadia National', 'LUK', 'JEF'],
    ['Kraft Foods', 'KFT', 'MDLZ']
]

adjustments_dict = dict()
for adj in adjustments:
    adjustments_dict[adj[0]] = {"old_ticker":adj[1],
                                "new_ticker":adj[2],
                                "reason":"Changed name"}

for k in adjustments_dict.keys():
    new_ticker = adjustments_dict[k]['new_ticker']
    company_info = get_company_core_info(new_ticker)
    
    if company_info.shape[0] > 0:
        new_comp_name = company_info['registrantName'].values[0]
        adjustments_dict[k]['new_company_name'] = new_comp_name
        adjustments_dict[k]['defunct'] = 'no'
    else:
        adjustments_dict[k]['new_company_name'] = 'N/A'
        adjustments_dict[k]['defunct'] = 'yes'    

In [395]:
# update the existing sp_500 history, so that it has the latest ticker, not the old ones
# after this go to the cell where 'Create ticker list per date'

for adj in adjustments_dict.keys():
    row = sp_history.loc[(sp_history.addedSecurity==adj)].values
    if len(row)>0:
        sp_history.loc[(sp_history.addedSecurity==adj) & 
                       (sp_history.symbol==adjustments_dict[adj]['old_ticker']), 
                       'symbol'] = adjustments_dict[adj]['new_ticker']
    else:
        row = sp_history.loc[(sp_history.removedSecurity==adj)].values
        if len(row)>0:
            sp_history.loc[(sp_history.removedSecurity==adj) & 
                           (sp_history.symbol==adjustments_dict[adj]['old_ticker']),
                           'symbol'] = adjustments_dict[adj]['new_ticker']
            sp_history.loc[(sp_history.removedSecurity==adj) & 
                           (sp_history.symbol==adjustments_dict[adj]['old_ticker']),
                           'removedSecurity'] = adjustments_dict[adj]['new_company_name']
        else:
            print("Error: {}".format(adj))
            break
        

In [396]:
adjustments_dict['Tesoro Corporation']

{'old_ticker': 'TSO',
 'new_ticker': 'ANDV',
 'reason': 'Changed name',
 'new_company_name': 'N/A',
 'defunct': 'yes'}

#### Create a snapshot of the S&P500 list for each change date

In [408]:
today = datetime.strftime(datetime.today(), '%Y-%m-%d')
sp_dict = dict()
sp_dict[today] = [sp.symbol.tolist().copy()]

sp500_comp = sp_dict[today][0].copy()
for i, (_, added, _, removed, d, symbol, _) in sp_history.iterrows():
    try:
        if added == '':
            sp500_comp.append(symbol)
        elif removed == '':
            sp500_comp.remove(symbol)
        else:
            print("Not expected behaviour")
            break
            
        sp_dict[d] = [sp500_comp.copy()]
    except:
        print("ERROR - add: {}, remove: {}, d: {}, symbol: {}".format(added, removed, d, symbol))
        break

In [428]:
sp_historical = pd.DataFrame(sp_dict).T.reset_index()
sp_historical.columns = ['date', 'tickers']
sp_historical['date'] = pd.to_datetime(sp_historical['date'], format="%Y-%m-%d")

In [429]:
sp_historical.head(2)

Unnamed: 0,date,tickers
0,2022-07-17,"[FCX, MMM, AOS, ABT, ABBV, ABMD, ACN, ATVI, AD..."
1,2022-06-08,"[FCX, MMM, AOS, ABT, ABBV, ABMD, ACN, ATVI, AD..."


In [468]:
sp_historical.to_csv('app/data_storage/S&P500/S&P500_history_compositions.csv', index=False)

#### Identify all the tickers I need to extract

In [417]:
ticker_list = set()

for i in sp_historical.tickers:
    ticker_tmp_list = set(i)
    ticker_list.update(ticker_tmp_list)

print("There are {} unique tickers that has been or are in sp500".format(len(ticker_list)))

There are 778 unique tickers that has been or are in sp500


In [460]:
# identify which date each ticker was included in sp500

ticker_dict = dict()
today = datetime.strftime(datetime.today(), '%Y-%m-%d')

# for each ticker, create a column where it indicates whether it is in the ticker list
# 1 if it is, 0 if not
# use this column to indicate the min and max date of the stock

for t in tqdm(ticker_list):
    found = list()
    for i in range(sp_historical.shape[0]):
        found.append(len(np.where(np.array(sp_historical['tickers'][i]) == t)[0]))
    sp_historical['found'] = found
    
    if sp_historical.loc[sp_historical['found']!=0, 'date'].max() == sp_historical['date'].max():
        max_date = today
    else:
        max_date = sp_historical.loc[sp_historical['found']!=0, 'date'].max().strftime("%Y-%m-%d")
        
    ticker_dict[t] = {'min_date' : sp_historical.loc[sp_historical['found']!=0, 'date'].min().strftime("%Y-%m-%d"),
                      'max_date' : max_date,
                      'dates' : sp_historical.loc[sp_historical['found']!=0, 'date'].astype(str).values.tolist()}

  0%|          | 0/778 [00:00<?, ?it/s]

In [461]:
for ticker in tqdm(ticker_dict.keys()):
    test = get_company_core_info(ticker)
    time.sleep(1)
    if len(test)==1:
        ticker_dict[ticker]['defunct'] = 'No'
    else:
        ticker_dict[ticker]['defunct'] = 'Yes'

  0%|          | 0/778 [00:00<?, ?it/s]

In [469]:
print("{} tickers are defunct.".format(len([i for i in ticker_dict.keys() if ticker_dict[i]['defunct']=='Yes'])))

212 tickers are defunct.


In [473]:
ticker_details = pd.DataFrame(ticker_dict).T.reset_index()
ticker_details.to_csv("app/data_storage/S&P500/ticker_details.csv", index=False)

#### Collect data from the ticker details

In [386]:
sp_history.loc[sp_history.symbol=='MDLZ']

Unnamed: 0,dateAdded,addedSecurity,removedTicker,removedSecurity,date,symbol,reason
551,2007-03-30,Kraft Foods,,,2007-03-30,MDLZ,Taken Private


In [378]:
sp_history.head(2)

Unnamed: 0,dateAdded,addedSecurity,removedTicker,removedSecurity,date,symbol,reason
0,2022-06-08,,CERN,Cerner,2022-06-08,CERN,S&P 500 constituent Oracle Corp. acquired Cerner.
1,2022-06-08,Vici,,,2022-06-08,VICI,S&P 500 constituent Oracle Corp. acquired Cerner.


In [365]:
# replace the values in sp_history
adjustments_dict

{'QuintilesIMS': {'old_ticker': 'Q',
  'new_ticker': 'IQV',
  'reason': 'Changed name',
  'new_company_name': 'IQVIA HOLDINGS INC.',
  'defunct': 'no'},
 'Willis Towers Watson': {'old_ticker': 'WLTW',
  'new_ticker': 'WTW',
  'reason': 'Changed name',
  'new_company_name': 'WILLIS TOWERS WATSON PLC',
  'defunct': 'no'},
 'Discovery Communications': {'old_ticker': 'DISCK',
  'new_ticker': 'WBD',
  'reason': 'Changed name',
  'new_company_name': 'Discovery, Inc.',
  'defunct': 'no'},
 'Facebook': {'old_ticker': 'FB',
  'new_ticker': 'META',
  'reason': 'Changed name',
  'new_company_name': 'Meta Platforms, Inc.',
  'defunct': 'no'},
 'Michael Kors': {'old_ticker': 'KORS',
  'new_ticker': 'CPRI',
  'reason': 'Changed name',
  'new_company_name': 'CAPRI HOLDINGS LTD',
  'defunct': 'no'},
 'Delphi Automotive': {'old_ticker': 'DLPH',
  'new_ticker': 'APTV',
  'reason': 'Changed name',
  'new_company_name': 'APTIV PLC',
  'defunct': 'no'},
 'Ingersoll-Rand': {'old_ticker': 'IR',
  'new_ticker

In [None]:
adjustments = [
    ['QuintilesIMS', 'Q', 'IQV'],
    ['Willis Towers Watson', 'WLTW', 'WTW'],
    ['Discovery Communications', 'DISCK', 'WBD'],
    ['Facebook', 'FB', 'META'],
    ['Michael Kors', 'KORS', 'CPRI'],
    ['Delphi Automotive', 'DLPH', 'APTV'],
    ['Ingersoll-Rand', 'IR', 'TT'],
    ['Priceline.com', 'PCLN', 'BKNG'],
    ['Harris Corporation', 'HRS', 'LHX'],
    ['Cabot Oil & Gas', 'COG', 'CTRA'],
    ['Jacobs Engineering Group', 'JEC', 'J'],
    ['Tesoro Corporation', 'TSO', 'ANDV'],
    ['Leucadia National', 'LUK', 'JEF'],
    ['Kraft Foods', 'KFT', 'MDLZ'],
    ['Countrywide Credit Industries', 'CFC', 'MDLZ']
]

In [287]:
def get_company_core_info(ticker, key=key):
    url = "https://financialmodelingprep.com/api/v4/company-core-information?symbol={}&apikey={}".format(ticker, key)
    df = get_jsonparsed_data(url)
    df = pd.DataFrame(df)
    return df

In [288]:
get_company_core_info('IQV')

Unnamed: 0,cik,symbol,exchange,sicCode,sicGroup,sicDescription,stateLocation,stateOfIncorporation,fiscalYearEnd,businessAddress,mailingAddress,taxIdentificationNumber,registrantName
0,1478242,IQV,NYSE,8731,Services,SERVICES-COMMERCIAL PHYSICAL & BIOLOGICAL RESE...,NC,DE,12-31,"4820 EMPEROR BLVD.,DURHAM NC 27703,919-998-2000","4820 EMPEROR BLVD.,DURHAM NC 27703",27-1341991,IQVIA HOLDINGS INC.


In [149]:
def get_company_core_info(ticker, key=key):
    url = "https://financialmodelingprep.com/api/v4/company-core-information?symbol={}&apikey={}".format(ticker, key)
    df = get_jsonparsed_data(url)
    df = pd.DataFrame(df)
    return df

def get_ticker_historical_data()

In [169]:
a = get_company_core_info("WTW")

In [170]:
a

[{'cik': '0001140536',
  'symbol': 'WTW',
  'exchange': 'NASDAQ',
  'sicCode': '6411',
  'sicGroup': 'Finance, Insurance and Real Estate',
  'sicDescription': 'INSURANCE AGENTS BROKERS & SERVICES',
  'stateLocation': 'X0',
  'stateOfIncorporation': 'L2',
  'fiscalYearEnd': '12-31',
  'businessAddress': 'C/O WILLIS GROUP LIMITED,51 LIME STREET,LONDON ENGLAND X0 EC3M 7DQ,44-20-3124-6000',
  'mailingAddress': 'C/O WILLIS GROUP LIMITED,51 LIME STREET,LONDON ENGLAND X0 EC3M 7DQ',
  'taxIdentificationNumber': '98-0352587',
  'registrantName': 'WILLIS TOWERS WATSON PLC'}]

In [475]:
url = "https://financialmodelingprep.com/api/v4/historical-price/WLTW/1/day/2000-01-01/2022-07-16?apikey={}".format(key)
df = get_jsonparsed_data(url)
wltw_historical = pd.DataFrame(df['results'])

In [476]:
url = "https://financialmodelingprep.com/api/v4/historical-price/WTW/1/day/2000-01-01/2022-07-16?apikey={}".format(key)
df = get_jsonparsed_data(url)
wtw_historical = pd.DataFrame(df['results'])

In [477]:
url = "https://financialmodelingprep.com/api/v3/historical-price-full/{}?apikey={}".format('WTW', key)
df = get_jsonparsed_data(url)
wtw = pd.DataFrame(df['historical'])

In [478]:
url = "https://financialmodelingprep.com/api/v3/historical-price-full/{}?from={}&to={}&apikey={}".format('WTW',
                                                                                                         '2000-01-01',
                                                                                                         '2022-07-16',
                                                                                                         key)
df = get_jsonparsed_data(url)
wtw_full = pd.DataFrame(df['historical'])

In [491]:
wltw_historical

Unnamed: 0,o,h,c,l,v,t,formated
0,123.43,126.250,125.84,122.31,2163572,1451970000000,2016-01-05 00:00:00
1,125.24,125.540,119.98,119.94,2386461,1452056400000,2016-01-06 00:00:00
2,116.38,119.740,114.95,114.93,2489451,1452142800000,2016-01-07 00:00:00
3,115.48,117.440,116.62,113.50,2006267,1452229200000,2016-01-08 00:00:00
4,117.01,117.330,114.97,114.09,1408550,1452488400000,2016-01-11 00:00:00
...,...,...,...,...,...,...,...
1510,237.28,237.945,235.57,234.20,848622,1641186000000,2022-01-03 00:00:00
1511,232.61,239.570,238.67,232.61,952150,1641272400000,2022-01-04 00:00:00
1512,239.43,239.650,237.01,236.24,1070457,1641358800000,2022-01-05 00:00:00
1513,236.36,238.100,234.34,234.22,1128116,1641445200000,2022-01-06 00:00:00


In [492]:
wtw_historical

Unnamed: 0,o,h,c,l,v,t,formated
0,44.86,45.18,44.95,44.860,168700,1063252800000,2003-09-11 00:00:00
1,41.61,43.06,42.73,41.520,828600,1064980800000,2003-10-01 00:00:00
2,42.70,42.70,41.89,41.700,739200,1065067200000,2003-10-02 00:00:00
3,42.00,42.33,40.50,40.010,1523900,1065153600000,2003-10-03 00:00:00
4,40.50,40.50,39.40,37.800,1607800,1065412800000,2003-10-06 00:00:00
...,...,...,...,...,...,...,...
4039,199.11,201.32,199.24,198.490,441046,1657512000000,2022-07-11 00:00:00
4040,198.98,201.09,196.84,196.430,518062,1657598400000,2022-07-12 00:00:00
4041,194.11,196.25,193.72,193.020,495835,1657684800000,2022-07-13 00:00:00
4042,190.84,193.33,193.24,188.990,616966,1657771200000,2022-07-14 00:00:00


In [None]:
wtw_historical

In [480]:
wltw_historical.formated.min(), wltw_historical.formated.max()

('2016-01-05 00:00:00', '2022-01-07 00:00:00')

In [483]:
wtw_historical.formated.min(), wtw_historical.formated.max()

('2003-09-11 00:00:00', '2022-07-15 00:00:00')

In [484]:
wtw.date.min(), wtw.date.max()

('2017-07-19', '2022-07-18')

In [485]:
wltw_historical.loc[wltw_historical.formated=='2022-01-07 00:00:00']

Unnamed: 0,o,h,c,l,v,t,formated
1514,233.05,235.5,231.56,231.26,1233519,1641531600000,2022-01-07 00:00:00


In [486]:
wtw_historical.loc[wtw_historical.formated=='2022-01-10 00:00:00']

Unnamed: 0,o,h,c,l,v,t,formated
3915,228.78,235.94,232.62,226.4,1124440,1641790800000,2022-01-10 00:00:00


In [487]:
wtw.loc[(wtw.date=='2022-01-07') | (wtw.date=='2022-01-10')]

Unnamed: 0,date,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,label,changeOverTime
129,2022-01-10,228.779999,235.940002,226.399994,232.619995,232.619995,1124400.0,1124400.0,3.84,1.678,231.65333,"January 10, 22",0.01678
130,2022-01-07,233.050003,235.5,231.259995,231.559998,231.559998,1233500.0,1233500.0,-1.49,-0.639,232.77333,"January 07, 22",-0.00639


In [488]:
url = "https://financialmodelingprep.com/api/v3/historical-price-full/{}?from={}&to={}&apikey={}".format('Q',
                                                                                                         '2000-01-01',
                                                                                                         '2022-07-16',
                                                                                                         key)
df = get_jsonparsed_data(url)
df = pd.DataFrame(df['historical'])

KeyError: 'historical'

In [None]:
d

In [263]:
url = "https://financialmodelingprep.com/api/v3/historical-price-full/{}?from={}&to={}&apikey={}".format('IQV',
                                                                                                         '2000-01-01',
                                                                                                         '2022-07-16',
                                                                                                         key)
df = get_jsonparsed_data(url)
df = pd.DataFrame(df['historical'])

In [265]:
url = "https://financialmodelingprep.com/api/v3/historical-price-full/{}?apikey={}".format('SUNE', key)
df = get_jsonparsed_data(url)
df = pd.DataFrame(df['historical'])
df

KeyError: 'historical'

In [490]:
url = "https://financialmodelingprep.com/api/v4/historical-price/Q/1/day/2000-01-01/2022-07-16?apikey={}".format(key)
df = get_jsonparsed_data(url)
df = pd.DataFrame(df['results'])
df

Unnamed: 0,o,h,c,l,v,t,formated
0,4.09,4.17,3.99,3.97,6972600,1063252800000,2003-09-11 00:00:00
1,3.43,3.58,3.58,3.40,6119100,1064980800000,2003-10-01 00:00:00
2,3.55,3.58,3.52,3.50,3885100,1065067200000,2003-10-02 00:00:00
3,3.60,3.70,3.61,3.55,4608100,1065153600000,2003-10-03 00:00:00
4,3.62,3.70,3.68,3.61,3819900,1065412800000,2003-10-06 00:00:00
...,...,...,...,...,...,...,...
3025,108.52,108.82,104.72,103.82,3089302,1510117200000,2017-11-08 00:00:00
3026,103.55,103.77,103.51,101.50,2696673,1510203600000,2017-11-09 00:00:00
3027,103.70,104.61,104.05,103.57,1138299,1510290000000,2017-11-10 00:00:00
3028,101.98,103.87,102.91,101.18,1532747,1510549200000,2017-11-13 00:00:00


In [489]:
url = "https://financialmodelingprep.com/api/v4/historical-price/IQV/1/day/2000-01-01/2022-07-16?apikey={}".format(key)
df = get_jsonparsed_data(url)
df = pd.DataFrame(df['results'])
df

Unnamed: 0,o,h,c,l,v,t,formated
0,102.67,104.3800,103.84,100.9001,599962,1510722000000,2017-11-15 00:00:00
1,104.55,104.8363,104.44,103.4400,934937,1510808400000,2017-11-16 00:00:00
2,103.73,105.5400,104.46,103.7300,690796,1510894800000,2017-11-17 00:00:00
3,104.44,104.9900,103.90,103.8700,1127064,1511154000000,2017-11-20 00:00:00
4,103.88,105.0000,104.36,103.3700,1210643,1511240400000,2017-11-21 00:00:00
...,...,...,...,...,...,...,...
1168,217.35,219.6800,217.40,216.4780,517521,1657512000000,2022-07-11 00:00:00
1169,217.18,219.8000,213.53,211.9400,613167,1657598400000,2022-07-12 00:00:00
1170,209.37,213.9300,211.83,208.3800,533077,1657684800000,2022-07-13 00:00:00
1171,209.00,212.4300,212.00,206.2600,877064,1657771200000,2022-07-14 00:00:00


In [220]:
url = "https://financialmodelingprep.com/api/v3/historical-price-full/{}?apikey={}".format('IQV', key)
df = get_jsonparsed_data(url)
df = pd.DataFrame(df['historical'])
df

Unnamed: 0,date,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,label,changeOverTime
0,2022-07-15,216.01,219.13,214.070,219.08,219.08,986100.0,986100.0,3.07,1.421,217.42667,"July 15, 22",0.01421
1,2022-07-14,209.00,212.43,206.260,212.00,212.00,837856.0,837856.0,3.00,1.435,210.23000,"July 14, 22",0.01435
2,2022-07-13,209.37,213.93,208.380,211.83,211.83,513431.0,513431.0,2.46,1.175,211.38000,"July 13, 22",0.01175
3,2022-07-12,217.18,219.80,211.940,213.53,213.53,592605.0,592605.0,-3.65,-1.681,215.09000,"July 12, 22",-0.01681
4,2022-07-11,217.35,219.68,216.478,217.40,217.40,500177.0,500177.0,0.05,0.023,217.85267,"July 11, 22",0.00023
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1254,2017-07-21,90.40,91.48,90.400,91.15,91.15,753238.0,753238.0,0.75,0.830,91.01000,"July 21, 17",0.00830
1255,2017-07-20,90.89,90.94,89.890,90.30,90.30,760089.0,760089.0,-0.59,-0.649,90.37667,"July 20, 17",-0.00649
1256,2017-07-19,90.39,91.19,90.250,90.96,90.96,649618.0,649618.0,0.57,0.631,90.80000,"July 19, 17",0.00631
1257,2017-07-18,89.94,90.17,89.375,90.07,90.07,641887.0,641887.0,0.13,0.145,89.87167,"July 18, 17",0.00145


In [None]:
https://financialmodelingprep.com/api/v3/stock/list?apikey=YOUR_API_KEY

In [146]:
url = "https://financialmodelingprep.com/api/v4/historical-price/Q/1/day/2000-01-01/2022-07-16?apikey={}".format(key)
df = get_jsonparsed_data(url)
df = pd.DataFrame(df)

In [139]:
## search

keyword = 'QuintilesIMS'
url = "https://financialmodelingprep.com/api/v3/search?query={}&limit=10&exchange=NASDAQ&apikey={}".format(keyword, key)
df = get_jsonparsed_data(url)
df

[]

In [27]:
# get current S&P500 list
url = "https://financialmodelingprep.com/api/v3/sp500_constituent?apikey={}".format(key)
sp = get_jsonparsed_data(url)
sp = pd.DataFrame(sp)

In [36]:
sp.head(2)

Unnamed: 0,symbol,name,sector,subSector,headQuarter,dateFirstAdded,cik,founded
0,FCX,Freeport-McMoRan,Materials,Copper,"Phoenix, Arizona",,831259,1912
1,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1976-08-09,66740,1902


In [67]:
sp_history['dateAdded'].nunique()

233

In [64]:
sp_history.loc[sp_history['dateAdded']==sp_history['dateAdded'].unique()[0]]

Unnamed: 0,dateAdded,addedSecurity,removedTicker,removedSecurity,date,symbol,reason
0,2022-06-08,,CERN,Cerner,2022-06-08,CERN,S&P 500 constituent Oracle Corp. acquired Cerner.
1,2022-06-08,Vici,,,2022-06-08,VICI,S&P 500 constituent Oracle Corp. acquired Cerner.


In [77]:
url = "https://financialmodelingprep.com/api/v4/historical-price/KFT/1/day/2003-01-01/2021-10-16?apikey={}".format(key)
# url = "https://financialmodelingprep.com/api/v3/historical-price-full/AAPL,GOOG,FB?apikey=42573d51ce11e37bf7503838f5c3f6ec"
# url = "https://financialmodelingprep.com/api/v3/historical-price-full/CCI?serietype=line&apikey=42573d51ce11e37bf7503838f5c3f6ec"
df = get_jsonparsed_data(url)
df = pd.DataFrame(df['results'])

In [78]:
df

Unnamed: 0,o,h,c,l,v,t,formated
0,29.74,29.80,29.680,29.50,1607700,1063252800000,2003-09-11 00:00:00
1,29.40,30.00,29.950,29.40,1926200,1064980800000,2003-10-01 00:00:00
2,29.91,30.00,29.930,29.76,1142800,1065067200000,2003-10-02 00:00:00
3,30.18,30.18,29.920,29.82,1328800,1065153600000,2003-10-03 00:00:00
4,30.00,30.08,30.000,29.60,1201900,1065412800000,2003-10-06 00:00:00
...,...,...,...,...,...,...,...
2264,41.50,41.95,41.355,41.31,20446018,1348545600000,2012-09-25 00:00:00
2265,41.52,41.81,41.270,41.18,15226249,1348632000000,2012-09-26 00:00:00
2266,41.16,41.34,41.195,41.00,14123495,1348718400000,2012-09-27 00:00:00
2267,41.21,41.44,41.350,40.90,13989021,1348804800000,2012-09-28 00:00:00


In [75]:
# get current S&P500 list
url = "https://financialmodelingprep.com/api/v4/historical-price/MDLZ/1/day/2003-01-01/2021-10-16?apikey={}".format(key)
# url = "https://financialmodelingprep.com/api/v3/historical-price-full/AAPL,GOOG,FB?apikey=42573d51ce11e37bf7503838f5c3f6ec"
# url = "https://financialmodelingprep.com/api/v3/historical-price-full/CCI?serietype=line&apikey=42573d51ce11e37bf7503838f5c3f6ec"
df = get_jsonparsed_data(url)
df = pd.DataFrame(df['results'])

In [76]:
df

Unnamed: 0,o,h,c,l,v,t,formated
0,28.4200,28.48,28.008,27.390,31575211,1349150400000,2012-10-02 00:00:00
1,28.1800,28.29,27.830,27.730,37901593,1349236800000,2012-10-03 00:00:00
2,27.8700,28.21,28.060,27.870,16877994,1349323200000,2012-10-04 00:00:00
3,28.2499,28.37,27.810,27.740,17569280,1349409600000,2012-10-05 00:00:00
4,27.8500,27.90,27.550,27.480,17364334,1349668800000,2012-10-08 00:00:00
...,...,...,...,...,...,...,...
2270,59.8500,59.85,59.420,59.340,5266371,1633924800000,2021-10-11 00:00:00
2271,59.6300,59.91,59.670,59.420,6343278,1634011200000,2021-10-12 00:00:00
2272,59.9500,60.40,59.960,59.365,5638972,1634097600000,2021-10-13 00:00:00
2273,60.3000,60.74,60.630,60.170,4941470,1634184000000,2021-10-14 00:00:00


In [81]:
# get current S&P500 list
url = "https://financialmodelingprep.com/api/v4/historical-price/FB/1/day/2003-01-01/2022-07-16?apikey={}".format(key)
# url = "https://financialmodelingprep.com/api/v3/historical-price-full/AAPL,GOOG,FB?apikey=42573d51ce11e37bf7503838f5c3f6ec"
# url = "https://financialmodelingprep.com/api/v3/historical-price-full/CCI?serietype=line&apikey=42573d51ce11e37bf7503838f5c3f6ec"
df = get_jsonparsed_data(url)
df = pd.DataFrame(df['results'])

In [82]:
df

Unnamed: 0,o,h,c,l,v,t,formated
0,42.05,45.000,38.2318,38.00,580526476,1337313600000,2012-05-18 00:00:00
1,36.53,36.660,34.0300,33.00,168309331,1337572800000,2012-05-21 00:00:00
2,32.61,33.590,31.0000,30.94,102053826,1337659200000,2012-05-22 00:00:00
3,31.37,32.500,32.0000,31.36,73721135,1337745600000,2012-05-23 00:00:00
4,32.95,33.210,33.0300,31.77,50275879,1337832000000,2012-05-24 00:00:00
...,...,...,...,...,...,...,...
2526,188.45,200.935,198.8600,187.73,31951582,1654142400000,2022-06-02 00:00:00
2527,195.98,196.610,190.7800,189.78,19464993,1654228800000,2022-06-03 00:00:00
2528,193.99,196.920,194.2500,188.40,30574242,1654488000000,2022-06-06 00:00:00
2529,191.93,196.530,195.6500,191.49,18628687,1654574400000,2022-06-07 00:00:00


In [83]:
# get current S&P500 list
url = "https://financialmodelingprep.com/api/v4/historical-price/META/1/day/2003-01-01/2022-07-16?apikey={}".format(key)
# url = "https://financialmodelingprep.com/api/v3/historical-price-full/AAPL,GOOG,FB?apikey=42573d51ce11e37bf7503838f5c3f6ec"
# url = "https://financialmodelingprep.com/api/v3/historical-price-full/CCI?serietype=line&apikey=42573d51ce11e37bf7503838f5c3f6ec"
df = get_jsonparsed_data(url)
df = pd.DataFrame(df['results'])

In [84]:
df

Unnamed: 0,o,h,c,l,v,t,formated
0,15.0800,15.1900,15.12,15.0420,334935,1625025600000,2021-06-30 00:00:00
1,15.1300,15.1300,14.89,14.8400,241629,1625112000000,2021-07-01 00:00:00
2,15.0200,15.1000,15.00,14.9300,388152,1625198400000,2021-07-02 00:00:00
3,15.0800,15.0900,15.01,14.8700,685094,1625544000000,2021-07-06 00:00:00
4,15.0601,15.1500,14.89,14.8400,362538,1625630400000,2021-07-07 00:00:00
...,...,...,...,...,...,...,...
167,169.9800,172.4000,170.88,168.1501,19249243,1657252800000,2022-07-08 00:00:00
168,167.0650,167.4900,162.88,161.9100,21905133,1657512000000,2022-07-11 00:00:00
169,164.8000,165.9107,163.27,162.1000,16639659,1657598400000,2022-07-12 00:00:00
170,160.1600,164.9800,163.49,159.6127,16496895,1657684800000,2022-07-13 00:00:00


# S&P500 list

#### Extract tables from Wiki

In [88]:
URL_NEW = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
# the fixed version of the sp500 is
# https://en.wikipedia.org/w/index.php?title=List_of_S%26P_500_companies&oldid=1095558369
# if the wiki page format changes, some of the codes below might not work.
# in that case, use the fixed version of the wiki page
page_NEW = requests.get(URL_NEW)

soup_NEW = BeautifulSoup(page_NEW.content, "html.parser")

In [89]:
changes = pd.read_html(str(soup_NEW.find(id='changes')))[0]
# concat the multi-headers into single header
changes.columns = ['_'.join(i) for i in list(changes.columns)]
# this is the current list of sp500 companies
constituents = pd.read_html(str(soup_NEW.find(id='constituents')))[0]

Identify tickers that has changed