# Doawnloads and Stores IPO S-1 Filings as .txt Files

* Uses EDGAR search
 * https://www.sec.gov/edgar/searchedgar/companysearch.html
* Uses NASDAQ IPO lists
 * https://www.nasdaq.com/markets/ipos/
* Skips ambiguous company names (do manually)
* Takes largest file if multiple files are available (S-1, S-1/A etc.)

In [1]:
import edgar
from edgar import Edgar
from edgar import Company

import pandas as pd

from pathlib import Path

import datetime
from datetime import timedelta
from collections import OrderedDict

In [2]:
# params
date_range = ["2017-01-01", datetime.datetime.today().strftime('%Y-%m-%d')]
start, end = [datetime.datetime.strptime(_, "%Y-%m-%d") for _ in date_range]
date_dict = OrderedDict(((start + timedelta(_)).strftime(r"%Y-%m"), None) for _ in range((end - start).days)).keys()
print('date_dict:', date_dict)

date_dict: odict_keys(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06'])


### Scrape NASDAQ IPO Lists

In [3]:
df_symbols = pd.DataFrame()

for x in date_dict:
    df_symbols = df_symbols.append(pd.read_html('https://www.nasdaq.com/markets/ipos/activity.aspx?tab=pricings&month=' + x)[0], ignore_index=True)
    
df_symbols.index = df_symbols['Symbol']    

In [4]:
df_symbols.head(3)

Unnamed: 0_level_0,Company Name,Symbol,Market,Price,Shares,Offer Amount,Date Priced
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GLDW,WORLD GOLD TRUST,GLDW,NYSE Arca,$119.32,5000000,"$596,600,000",1/30/2017
JAG,JAGGED PEAK ENERGY INC.,JAG,NYSE,$15,31599334,"$473,990,010",1/27/2017
JELD,"JELD-WEN HOLDING, INC.",JELD,NYSE,$23,25000000,"$575,000,000",1/27/2017


### Batch Download

In [6]:
#batch download
counter = 0

for x in df_symbols['Symbol']:
    counter += 1
    print('\n( ' + str(counter) + ' / ' + str(df_symbols.shape[0]) + ' ) ' + x)
    
    #check if exists
    if Path("./Data/" + x + ".txt").is_file():
        print(x + ' data already exists, skipping...')
        continue
    # file exists
    
    #create company
    tmpEdgar = Edgar()
    possible_companies = tmpEdgar.findCompanyName(df_symbols.loc[x]['Company Name'])
    print('possible_companies:', possible_companies)
    
    #validate
    if len(possible_companies) == 0:
        print('no possible companies:', x)
        continue
    #elif len(possible_companies) > 1:
    #    print('too many possible companies:', x)
    #    continue
    
    name = possible_companies[0]
    cik = tmpEdgar.getCikByCompanyName(possible_companies[0])
    company = Company(name, cik)
    
    #look for S-1
    print(company.getFilingsUrl(filingType = "S-1"))
    tree = company.getAllFilings(filingType = "S-1")
    docs = edgar.getDocuments(tree, noOfDocuments=5)
    
    #write file
    if len(docs) > 0:
        with open("./Data/" + x + ".txt", "w", encoding="utf-8") as f:
            #find max text length 
            l = max(len(x) for x in docs)
            print([len(x) for x in docs])
            
            #write longest text
            f.write([x for x in docs if len(x) == l][0])
            
            print('Got data for ' + x)


( 1 / 314 ) GLDW
GLDW data already exists, skipping...

( 2 / 314 ) JAG
JAG data already exists, skipping...

( 3 / 314 ) JELD
JELD data already exists, skipping...

( 4 / 314 ) JNCE
JNCE data already exists, skipping...

( 5 / 314 ) REVG
REVG data already exists, skipping...

( 6 / 314 ) OBSV
possible_companies: ['OBSEVA SA']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001685316&type=S-1&dateb=&owner=include&count=100

( 7 / 314 ) ANAB
ANAB data already exists, skipping...

( 8 / 314 ) FNTEU
FNTEU data already exists, skipping...

( 9 / 314 ) FRAC
FRAC data already exists, skipping...

( 10 / 314 ) GSHTU
GSHTU data already exists, skipping...

( 11 / 314 ) CLPR
CLPR data already exists, skipping...

( 12 / 314 ) FBM
FBM data already exists, skipping...

( 13 / 314 ) SACH
SACH data already exists, skipping...

( 14 / 314 ) METC
METC data already exists, skipping...

( 15 / 314 ) KRP
KRP data already exists, skipping...

( 16 / 314 ) LAUR
LAUR data already exists, s

[8434, 687119, 648631, 645739, 642800]
Got data for VERI

( 64 / 314 ) NESRU
possible_companies: ['NATIONAL ENERGY SERVICES REUNITED CORP.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001698514&type=S-1&dateb=&owner=include&count=100
[10393, 588961, 591896, 588927, 577488]
Got data for NESRU

( 65 / 314 ) FPH
possible_companies: ['FIVE POINT HOLDINGS, LLC']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001574197&type=S-1&dateb=&owner=include&count=100
[1359785, 1358706, 1344741]
Got data for FPH

( 66 / 314 ) GNTY
possible_companies: ['GUARANTY BANCSHARES INC /TX/']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001058867&type=S-1&dateb=&owner=include&count=100
[23037, 897873, 897882, 9309, 9309]
Got data for GNTY

( 67 / 314 ) NDRAU
possible_companies: ['ENDRA LIFE SCIENCES INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001681682&type=S-1&dateb=&owner=include&count=100
[10400, 33462, 446316, 444194, 444145

[1147269, 1147659, 1136543]
Got data for SNNA

( 100 / 314 ) WRLSU
possible_companies: ['PENSARE ACQUISITION CORP']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001704760&type=S-1&dateb=&owner=include&count=100
[11810, 556380, 36668, 557733, 531075]
Got data for WRLSU

( 101 / 314 ) INDUU
possible_companies: ['INDUSTREA ACQUISITION CORP.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001703956&type=S-1&dateb=&owner=include&count=100
[773574, 777690]
Got data for INDUU

( 102 / 314 ) RBB
possible_companies: ['RBB BANCORP']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001499422&type=S-1&dateb=&owner=include&count=100
[9769, 19446, 968947, 970346, 964253]
Got data for RBB

( 103 / 314 ) OSPRU
possible_companies: ['OSPREY ENERGY ACQUISITION CORP']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001703785&type=S-1&dateb=&owner=include&count=100
[8672, 631097, 631802]
Got data for OSPRU

( 104 / 314 ) PETQ
possible_comp

possible_companies: ['I-AM CAPITAL ACQUISITION CO']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001708410&type=S-1&dateb=&owner=include&count=100
[742769, 741389, 720441, 712261]
Got data for IAMXU

( 116 / 314 ) CIC'U
possible_companies: ['CAPITOL INVESTMENT CORP. IV']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001709682&type=S-1&dateb=&owner=include&count=100
[16805, 619197, 618877, 652865]
Got data for CIC'U

( 117 / 314 ) RNGR
possible_companies: ['RANGER ENERGY SERVICES, INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001699039&type=S-1&dateb=&owner=include&count=100
[9840, 19577, 1033225, 1029456, 18155]
Got data for RNGR

( 118 / 314 ) YOGA
possible_companies: ['YOGAWORKS, INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001703497&type=S-1&dateb=&owner=include&count=100
[680051, 25904, 678563, 679279, 669381]
Got data for YOGA

( 119 / 314 ) ZEAL
possible_companies: ['ZEALAND PHARMA A/S / ADR',

[17634, 1820973, 16879, 1803044]
Got data for BPMP

( 148 / 314 ) ABLX
possible_companies: ['ABLYNX NV']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001617582&type=S-1&dateb=&owner=include&count=100

( 149 / 314 ) HYACU
possible_companies: ['HAYMAKER ACQUISITION CORP.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001707306&type=S-1&dateb=&owner=include&count=100
[700549, 712033, 708364]
Got data for HYACU

( 150 / 314 ) FAT
possible_companies: ['FAT BRANDS, INC']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001705012&type=S-1&dateb=&owner=include&count=100

( 151 / 314 ) LYL
possible_companies: ['DRAGON VICTORY INTERNATIONAL LTD']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001682241&type=S-1&dateb=&owner=include&count=100

( 152 / 314 ) MOSCU
possible_companies: ['MOSAIC ACQUISITION CORP.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001713952&type=S-1&dateb=&owner=include&count=100
[795


( 156 / 314 ) MDB
possible_companies: ['MONGODB, INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001441816&type=S-1&dateb=&owner=include&count=100
[750552, 752034, 745846]
Got data for MDB

( 157 / 314 ) QD
possible_companies: ['QUDIAN INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001692705&type=S-1&dateb=&owner=include&count=100

( 158 / 314 ) OPTN
possible_companies: ['OPTINOSE, INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001494650&type=S-1&dateb=&owner=include&count=100
[8054, 215632, 9147, 28745, 899702]
Got data for OPTN

( 159 / 314 ) KIDS
possible_companies: ['ORTHOPEDIATRICS CORP']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001425450&type=S-1&dateb=&owner=include&count=100
[994170, 993051, 1016528, 990418, 988782]
Got data for KIDS

( 160 / 314 ) HAIR
possible_companies: ['RESTORATION ROBOTICS, INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001409269&type=S-1

possible_companies: ['ACM RESEARCH, INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001680062&type=S-1&dateb=&owner=include&count=100
[25923, 25931, 746713, 746624, 724264]
Got data for ACMR

( 191 / 314 ) STNLU
possible_companies: ['SENTINEL ENERGY SERVICES INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001709768&type=S-1&dateb=&owner=include&count=100
[19873, 740950, 742984, 719090]
Got data for STNLU

( 192 / 314 ) SPRO
possible_companies: ['SPERO THERAPEUTICS, INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001701108&type=S-1&dateb=&owner=include&count=100
[899627, 883092]
Got data for SPRO

( 193 / 314 ) ALNA
possible_companies: ['ALLENA PHARMACEUTICALS, INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001624658&type=S-1&dateb=&owner=include&count=100
[836313, 825032]
Got data for ALNA

( 194 / 314 ) AQUA
possible_companies: ['EVOQUA WATER TECHNOLOGIES CORP.']
https://www.sec.gov/cgi-bin/br

possible_companies: ['PAGSEGURO DIGITAL LTD.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001712807&type=S-1&dateb=&owner=include&count=100

( 221 / 314 ) NINE
NINE data already exists, skipping...

( 222 / 314 ) ADT
ADT data already exists, skipping...

( 223 / 314 ) COLD
COLD data already exists, skipping...

( 224 / 314 ) OMADU
OMADU data already exists, skipping...

( 225 / 314 ) EAGLU
EAGLU data already exists, skipping...

( 226 / 314 ) ILPT
ILPT data already exists, skipping...

( 227 / 314 ) LBRT
LBRT data already exists, skipping...

( 228 / 314 ) NEBUU
NEBUU data already exists, skipping...

( 229 / 314 ) CUE
CUE data already exists, skipping...

( 230 / 314 ) LTN'U
LTN'U data already exists, skipping...

( 231 / 314 ) DFBHU
DFBHU data already exists, skipping...

( 232 / 314 ) FAMI
possible_companies: ['FARMMI, INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001701261&type=S-1&dateb=&owner=include&count=100

( 233 / 314 ) BFRA
poss


( 292 / 314 ) EVOP
EVOP data already exists, skipping...

( 293 / 314 ) PS
PS data already exists, skipping...

( 294 / 314 ) VTIQU
VTIQU data already exists, skipping...

( 295 / 314 ) TMCXU
TMCXU data already exists, skipping...

( 296 / 314 ) HUYA
possible_companies: ['HUYA INC.']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001728190&type=S-1&dateb=&owner=include&count=100

( 297 / 314 ) EQH
EQH data already exists, skipping...

( 298 / 314 ) EVLO
EVLO data already exists, skipping...

( 299 / 314 ) OBNK
OBNK data already exists, skipping...

( 300 / 314 ) ROAD
ROAD data already exists, skipping...

( 301 / 314 ) STXB
STXB data already exists, skipping...

( 302 / 314 ) ASLN
possible_companies: ['ASLAN PHARMACEUTICALS LTD']
https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001722926&type=S-1&dateb=&owner=include&count=100

( 303 / 314 ) BCML
BCML data already exists, skipping...

( 304 / 314 ) CBLK
CBLK data already exists, skipping...

( 305 / 314