In [62]:
import pandas as pd
import requests, json, time
from bs4 import BeautifulSoup

# Purpose: To understand how S-1 Filings can reveal information about a Company's lockup period, and give insight to how companies have performed after an IPO

We will be using Edgar's API 

Edgar's full text search UI, linked below, gives a listing of all 2024 S-1 filings. Below, we will programmatically pull these filings from Edgar. We will retrieve each S-1's filing date and filing URL (taking us to the actual S-1 filing). 


(https://www.sec.gov/edgar/search/#/dateRange=custom&category=custom&startdt=2024-01-01&enddt=2024-12-31&forms=S-1)

## Define Helpers

In [91]:
def get_s1_filings():
    base_url = "https://efts.sec.gov/LATEST/search-index"

    start_date = "2024-01-02"
    end_date = "2024-12-31"

    requests_per_second = 10
    delay = 1.0 / requests_per_second
    
    start_from = 0
    size = 100
    all_filings = []

    headers = {
        "User-Agent": "MyApp/1.0 (ruoyu@lockrmail.com)",
        "Accept": "application/json",
        "Content-Type": "application/json"
    }
    
    initial_url = f"{base_url}?dateRange=custom&category=custom&startdt={start_date}&enddt={end_date}&forms=S-1&page=1&from=0"
    response = requests.get(initial_url, headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        total_hits = data.get("hits", {}).get("total", {}).get("value", 0)
        print(f"Total S-1 filings found: {total_hits}")
        
        while start_from < total_hits:
            url = f"{base_url}?dateRange=custom&category=custom&startdt={start_date}&enddt={end_date}&forms=S-1&page={start_from // size + 1}&from={start_from}"
            response = requests.get(url, headers=headers)
            time.sleep(delay)
            
            if response.status_code == 200:
                data = response.json()
                filings = data.get("hits", {}).get("hits", [])
                
                for filing in filings:
                    source_data = filing.get("_source")
                    all_filings.append({
                        "filing_id": filing.get("_id"),
                        "display_names": source_data.get("display_names"),
                        "file_date": source_data.get("file_date"),
                        "file_type": source_data.get("file_type"),
                        "biz_locations": source_data.get("biz_locations"),
                        "sequence": source_data.get("sequence"),
                        "inc_states": source_data.get("inc_states"),
                    })
                
                start_from += size 
            else:
                print(f"Error: Received status code {response.status_code}")
                print(response.text)
                break
    else:
        print(f"Error: Received status code {response.status_code}")
        print(response.text)
    
    return all_filings

In [116]:
def create_edgar_url(cik, filing_id):
    file_identifer, file_name = filing_id.split(":")
    file_identifer = file_identifer.replace("-", "")
    edgar_url = f"https://www.sec.gov/Archives/edgar/data/{cik}/{file_identifer}/{file_name}"
    return edgar_url

def clean_s1_filings(dataframe_original):
    dataframe = dataframe_original.copy()
    dataframe = dataframe.explode(["display_names", "biz_locations", "inc_states"])
    dataframe["CIK"] = dataframe["display_names"].str.extract(r"CIK (\d+)")
    dataframe["URL"] = dataframe.apply(lambda x: create_edgar_url(cik=x["CIK"], filing_id=x["filing_id"]), axis=1)
    return dataframe

## Initialize Edgar request

In [92]:
list_of_filings = get_s1_filings()

Total S-1 filings found: 2663


## Clean Data

In [93]:
df = pd.DataFrame(list_of_filings)

In [117]:
df = clean_s1_filings(df)

In [118]:
df

Unnamed: 0,filing_id,display_names,file_date,file_type,biz_locations,sequence,inc_states,CIK,URL
0,0001640334-24-001974:aen_s1.htm,AEN Group Ltd. (CIK 0002033750),2024-12-31,S-1,"Calgary, A0",1,,0002033750,https://www.sec.gov/Archives/edgar/data/000203...
1,0001213900-24-114111:ea0214433-04.htm,Capstone Holding Corp. (CIK 0000887151),2024-12-31,S-1,"Alsip, IL",1,DE,0000887151,https://www.sec.gov/Archives/edgar/data/000088...
2,0001493152-24-052669:forms-1a.htm,Thoughtful Media Group Inc. (TMGX) (CIK 0001...,2024-12-31,S-1/A,"Carson City, NV",1,NY,0001991879,https://www.sec.gov/Archives/edgar/data/000199...
3,0001213900-24-113992:ea0225566-s1a1_dmint.htm,"DMINT, Inc. (DMNT) (CIK 0001996450)",2024-12-31,S-1/A,"New York, NY",1,,0001996450,https://www.sec.gov/Archives/edgar/data/000199...
4,0001213900-24-114064:ea0217603-04.htm,Alpha Cognition Inc. (ACOG) (CIK 0001655923),2024-12-31,S-1,"Vancouver, A1",1,A1,0001655923,https://www.sec.gov/Archives/edgar/data/000165...
...,...,...,...,...,...,...,...,...,...
2658,0001753926-24-000019:g194593_s1.htm,MEMBERS Life Insurance Co (CIK 0001562577),2024-01-02,S-1,"Waverly, IA",1,IA,0001562577,https://www.sec.gov/Archives/edgar/data/000156...
2659,0001193125-24-000734:d251968ds1.htm,"BrightSpring Health Services, Inc. (BTSG, BTS...",2024-01-02,S-1,"Louisville, KY",1,DE,0001865782,https://www.sec.gov/Archives/edgar/data/000186...
2660,0001753926-24-000025:g194594_s-1.htm,MEMBERS Life Insurance Co (CIK 0001562577),2024-01-02,S-1,"Waverly, IA",1,IA,0001562577,https://www.sec.gov/Archives/edgar/data/000156...
2661,0001437749-24-000191:govx20231222_s1.htm,"GeoVax Labs, Inc. (GOVX, GOVXW) (CIK 0000832...",2024-01-02,S-1,"Smyrna, GA",1,DE,0000832489,https://www.sec.gov/Archives/edgar/data/000083...


In [119]:
df.URL[0]

'https://www.sec.gov/Archives/edgar/data/0002033750/000164033424001974/aen_s1.htm'