In [1]:
import numpy as np
import pandas as pd
import pickle
from collections import defaultdict
%matplotlib inline

In [2]:
constituents = pd.read_csv("/Users/dunleavyjason/Documents/Metis/covid_disclosures/constituents.csv")

In [3]:
constituents.rename(columns={"Symbol":"ticker", "Name":"name", "Sector":"sector"}, inplace=True)
constituents

Unnamed: 0,ticker,name,sector
0,MMM,3M Company,Industrials
1,AOS,A.O. Smith Corp,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie Inc.,Health Care
4,ABMD,ABIOMED Inc,Health Care
...,...,...,...
500,YUM,Yum! Brands Inc,Consumer Discretionary
501,ZBRA,Zebra Technologies,Information Technology
502,ZBH,Zimmer Biomet,Health Care
503,ZION,Zions Bancorp,Financials


In [4]:
ciks = pd.read_csv("https://www.sec.gov/include/ticker.txt", header=None, names=["data"])

In [5]:
ciks = ciks.data.str.split("\t", expand=True)
ciks.rename(columns={0:"ticker", 1:"cik"}, inplace=True)
ciks["ticker"] = ciks.ticker.str.upper()
ciks

Unnamed: 0,ticker,cik
0,AAPL,320193
1,MSFT,789019
2,AMZN,1018724
3,GOOG,1652044
4,TCEHY,1293451
...,...,...
11093,ZSL,1415311
11094,ZNTEU,1823652
11095,ZNTEW,1823652
11096,ZNOGW,1131312


In [6]:
sp_500 = (constituents.set_index("ticker").join(ciks.set_index("ticker"), how="inner")).reset_index()
sp_500

Unnamed: 0,ticker,name,sector,cik
0,A,Agilent Technologies Inc,Health Care,1090872
1,AAL,American Airlines Group,Industrials,6201
2,AAP,Advance Auto Parts,Consumer Discretionary,1158449
3,AAPL,Apple Inc.,Information Technology,320193
4,ABBV,AbbVie Inc.,Health Care,1551152
...,...,...,...,...
499,YUM,Yum! Brands Inc,Consumer Discretionary,1041061
500,ZBH,Zimmer Biomet,Health Care,1136869
501,ZBRA,Zebra Technologies,Information Technology,877212
502,ZION,Zions Bancorp,Financials,109380


In [7]:
sp_500[sp_500.name == "American Airlines Group"]

Unnamed: 0,ticker,name,sector,cik
1,AAL,American Airlines Group,Industrials,6201


In [8]:
sp_500_series = pd.Series(sp_500["cik"].values, index=sp_500["ticker"])
sp_500_series

ticker
A       1090872
AAL        6201
AAP     1158449
AAPL     320193
ABBV    1551152
         ...   
YUM     1041061
ZBH     1136869
ZBRA     877212
ZION     109380
ZTS     1555280
Length: 504, dtype: object

In [9]:
sp_500_dict = sp_500_series.to_dict()

In [10]:
sample_dict = sp_500_series.iloc[0:3].to_dict()
sample_dict

{'A': '1090872', 'AAL': '6201', 'AAP': '1158449'}

In [11]:
# package used to execute HTTP POST request to the API
import json
import urllib.request
import time, os
import random

In [12]:
# API endpoint
TOKEN = "4576f2138046df6746527c9675d862c2658ddb99d1c7db6ff99b728c6faf1ebe"
API = "https://api.sec-api.io?token=" + TOKEN

In [13]:
qa_dict = {}
for key in sp_500_dict.keys():
    qa_dict[key] = {}

In [14]:
qa_dict

{'A': {},
 'AAL': {},
 'AAP': {},
 'AAPL': {},
 'ABBV': {},
 'ABC': {},
 'ABMD': {},
 'ABT': {},
 'ACN': {},
 'ADBE': {},
 'ADI': {},
 'ADM': {},
 'ADP': {},
 'ADSK': {},
 'AEE': {},
 'AEP': {},
 'AES': {},
 'AFL': {},
 'AIG': {},
 'AIZ': {},
 'AJG': {},
 'AKAM': {},
 'ALB': {},
 'ALGN': {},
 'ALK': {},
 'ALL': {},
 'ALLE': {},
 'ALXN': {},
 'AMAT': {},
 'AMCR': {},
 'AMD': {},
 'AME': {},
 'AMGN': {},
 'AMP': {},
 'AMT': {},
 'AMZN': {},
 'ANET': {},
 'ANSS': {},
 'ANTM': {},
 'AON': {},
 'AOS': {},
 'APA': {},
 'APD': {},
 'APH': {},
 'APTV': {},
 'ARE': {},
 'ATO': {},
 'ATVI': {},
 'AVB': {},
 'AVGO': {},
 'AVY': {},
 'AWK': {},
 'AXP': {},
 'AZO': {},
 'BA': {},
 'BAC': {},
 'BAX': {},
 'BBY': {},
 'BDX': {},
 'BEN': {},
 'BIIB': {},
 'BIO': {},
 'BK': {},
 'BKNG': {},
 'BKR': {},
 'BLK': {},
 'BLL': {},
 'BMY': {},
 'BR': {},
 'BSX': {},
 'BWA': {},
 'BXP': {},
 'C': {},
 'CAG': {},
 'CAH': {},
 'CARR': {},
 'CAT': {},
 'CB': {},
 'CBOE': {},
 'CBRE': {},
 'CCI': {},
 'CCL': {},


In [20]:
qa_list = []

In [21]:
for cik in sp_500_dict.values():
    
    payload = {
      "query": { "query_string": { "query":"cik:{}".format(cik) + " AND filedAt:{2020-04-01 TO 2021-02-17} AND (formType:\"10-Q\" OR formType:\"10-K\")"}},
      "from": "0",
      "size": "100",
      "sort": [{ "filedAt": { "order": "desc" } }]
    }

    jsondata = json.dumps(payload) # format your payload to JSON bytes
    jsondataasbytes = jsondata.encode('utf-8')   # needs to be bytes

    req = urllib.request.Request(API) # instantiate the request 

    req.add_header('Content-Type', 'application/json; charset=utf-8') # set the correct HTTP header: Content-Type = application/json
    req.add_header('Content-Length', len(jsondataasbytes)) # set the correct length of your request

    response = urllib.request.urlopen(req, jsondataasbytes) # send the request to the API

    res_body = response.read() # read the response 
    filings = json.loads(res_body.decode("utf-8")) # transform the response into JSON

    filing_dict = filings["filings"]
    
    for ident in filing_dict:
        qa = {}
        qa["cik"] = ident["cik"]
        qa["company_name"] = ident["companyName"]
        qa["filed_at"] = ident["filedAt"][:10]
        qa["form_type"] = ident["formType"]
        qa["linkToTxt"] = ident["linkToTxt"]
        qa_list.append(qa)

In [22]:
urls_df = pd.DataFrame(qa_list)
urls_df

Unnamed: 0,cik,company_name,filed_at,form_type,linkToTxt
0,1090872,"AGILENT TECHNOLOGIES, INC.",2020-12-17,10-K,https://www.sec.gov/Archives/edgar/data/109087...
1,1090872,"AGILENT TECHNOLOGIES, INC.",2020-09-01,10-Q,https://www.sec.gov/Archives/edgar/data/109087...
2,1090872,"AGILENT TECHNOLOGIES, INC.",2020-05-29,10-Q,https://www.sec.gov/Archives/edgar/data/109087...
3,6201,American Airlines Group Inc.,2020-10-22,10-Q,https://www.sec.gov/Archives/edgar/data/6201/0...
4,6201,American Airlines Group Inc.,2020-07-23,10-Q,https://www.sec.gov/Archives/edgar/data/6201/0...
...,...,...,...,...,...
1708,109380,"ZIONS BANCORPORATION, NATIONAL ASSOCIATION /UT/",2020-05-06,10-Q,https://www.sec.gov/Archives/edgar/data/109380...
1709,1555280,Zoetis Inc.,2021-02-16,10-K,https://www.sec.gov/Archives/edgar/data/155528...
1710,1555280,Zoetis Inc.,2020-11-05,10-Q,https://www.sec.gov/Archives/edgar/data/155528...
1711,1555280,Zoetis Inc.,2020-08-06,10-Q,https://www.sec.gov/Archives/edgar/data/155528...


In [23]:
urls_df.drop_duplicates(inplace=True)
urls_df

Unnamed: 0,cik,company_name,filed_at,form_type,linkToTxt
0,1090872,"AGILENT TECHNOLOGIES, INC.",2020-12-17,10-K,https://www.sec.gov/Archives/edgar/data/109087...
1,1090872,"AGILENT TECHNOLOGIES, INC.",2020-09-01,10-Q,https://www.sec.gov/Archives/edgar/data/109087...
2,1090872,"AGILENT TECHNOLOGIES, INC.",2020-05-29,10-Q,https://www.sec.gov/Archives/edgar/data/109087...
3,6201,American Airlines Group Inc.,2020-10-22,10-Q,https://www.sec.gov/Archives/edgar/data/6201/0...
4,6201,American Airlines Group Inc.,2020-07-23,10-Q,https://www.sec.gov/Archives/edgar/data/6201/0...
...,...,...,...,...,...
1708,109380,"ZIONS BANCORPORATION, NATIONAL ASSOCIATION /UT/",2020-05-06,10-Q,https://www.sec.gov/Archives/edgar/data/109380...
1709,1555280,Zoetis Inc.,2021-02-16,10-K,https://www.sec.gov/Archives/edgar/data/155528...
1710,1555280,Zoetis Inc.,2020-11-05,10-Q,https://www.sec.gov/Archives/edgar/data/155528...
1711,1555280,Zoetis Inc.,2020-08-06,10-Q,https://www.sec.gov/Archives/edgar/data/155528...


In [28]:
urls_df["filed_at"] = pd.to_datetime(urls_df.filed_at)

In [30]:
urls_df[urls_df["linkToTxt"].isnull()] #check for null urls

Unnamed: 0,cik,company_name,filed_at,form_type,linkToTxt


In [31]:
urls = pd.merge(urls_df, sp_500, how="left", on="cik", )
urls

Unnamed: 0,cik,company_name,filed_at,form_type,linkToTxt,ticker,name,sector
0,1090872,"AGILENT TECHNOLOGIES, INC.",2020-12-17,10-K,https://www.sec.gov/Archives/edgar/data/109087...,A,Agilent Technologies Inc,Health Care
1,1090872,"AGILENT TECHNOLOGIES, INC.",2020-09-01,10-Q,https://www.sec.gov/Archives/edgar/data/109087...,A,Agilent Technologies Inc,Health Care
2,1090872,"AGILENT TECHNOLOGIES, INC.",2020-05-29,10-Q,https://www.sec.gov/Archives/edgar/data/109087...,A,Agilent Technologies Inc,Health Care
3,6201,American Airlines Group Inc.,2020-10-22,10-Q,https://www.sec.gov/Archives/edgar/data/6201/0...,AAL,American Airlines Group,Industrials
4,6201,American Airlines Group Inc.,2020-07-23,10-Q,https://www.sec.gov/Archives/edgar/data/6201/0...,AAL,American Airlines Group,Industrials
...,...,...,...,...,...,...,...,...
1709,109380,"ZIONS BANCORPORATION, NATIONAL ASSOCIATION /UT/",2020-05-06,10-Q,https://www.sec.gov/Archives/edgar/data/109380...,ZION,Zions Bancorp,Financials
1710,1555280,Zoetis Inc.,2021-02-16,10-K,https://www.sec.gov/Archives/edgar/data/155528...,ZTS,Zoetis,Health Care
1711,1555280,Zoetis Inc.,2020-11-05,10-Q,https://www.sec.gov/Archives/edgar/data/155528...,ZTS,Zoetis,Health Care
1712,1555280,Zoetis Inc.,2020-08-06,10-Q,https://www.sec.gov/Archives/edgar/data/155528...,ZTS,Zoetis,Health Care


In [32]:
urls.to_pickle("urls.pkl")