In [191]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup


## Pulling all SEC 13 F links

In [192]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# Constants
CIK = "1536411"
SEC_API_URL = f"https://data.sec.gov/submissions/CIK000{CIK}.json"

# Headers for the SEC API request
headers = {'User-Agent': "bfassnacht17@gmail.com"}

base_url = "https://sec.gov"

# Function to get 13F filings
def get_13f_filings(cik):
    url = f"https://data.sec.gov/submissions/CIK000{cik}.json"
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        
        # Extract recent filings data
        forms = data['filings']['recent']['form']
        accession_numbers = data['filings']['recent']['accessionNumber']
        filing_dates = data['filings']['recent']['filingDate']
        
        # Create a list to store filing data
        filings_data = []
        
        # Loop through filings and filter for 13F-HR forms
        for i, form in enumerate(forms):
            if form == "13F-HR":
                accession_number = accession_numbers[i]
                filing_date = filing_dates[i]
                
                # Construct the link to the filing's index page
                filing_url = f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{accession_number}/{accession_number}-index.htm"
                
                # Parse the filing index page to find the 13F XML file link
                form_13f_url = get_form_13f_url(filing_url)
                
                # Append the data to the list
                filings_data.append({
                    'Form': form,
                    'Filing Date': filing_date,
                    'Filing Index URL': filing_url,
                    'Form 13F URL': form_13f_url
                })
        
        # Convert the list to a DataFrame
        df_filings = pd.DataFrame(filings_data)
        return df_filings
    else:
        print("Failed to retrieve data.")
        return None

# Function to extract the Form 13F XML file URL from the index page
def get_form_13f_url(index_url):
    response = requests.get(index_url, headers=headers)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Look for the link to the Form 13F XML file
        links = soup.find_all('a')
        
        for link in links:
            href = link.get('href')
            if href and ('slform13f' in href.lower() and href.endswith('.xml') and 'primary_doc' not in href.lower()):
                # Construct the full URL
               
                full_url = f"{base_url}/{href}"
                return full_url
    
    return None

# Get 13F filings
df_13f = get_13f_filings(CIK)

if df_13f is not None:
    df_13f


In [193]:
df_13f_filtered = df_13f[df_13f["Form 13F URL"].notna()]

## Pulling Holdings into data frame

In [194]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET


dfs = []

for index, row in df_13f_filtered.iterrows():

    url = row["Form 13F URL"]
    filing_date = row["Filing Date"]
    
    # Fetching the XML content from the URL
    response = requests.get(url, headers=headers)
    xml_content = response.content

    # Create a BeautifulSoup object
    soup = BeautifulSoup(xml_content, 'html.parser')

    # Find the table element containing the data
    table = soup.find('table', summary="Form 13F-NT Header Information")

    # Extract the column headers
    header_row = table.find('tr')
    header_cells = header_row.find_all('td')
    column_headers = [cell.text.strip() for cell in header_cells]

    # Extract the data rows
    data_rows = table.find_all('tr')[1:]  # Skip the header row
    data = []
    for row in data_rows:
        cells = row.find_all('td')
        row_data = [cell.text.strip() for cell in cells]
        data.append(row_data)   

    # Create a pandas DataFrame
    df = pd.DataFrame(data[2:], columns=data[1])

    # Handle non-breaking space characters (if present)
    df = df.replace('\xa0', '', regex=True)

    dfs.append([filing_date, df])



## Pulling data frame of historical holdings

In [235]:
filing_dates = [date[0] for date in dfs]
columns = filing_dates

all_holdings = pd.concat([df[1][["NAME OF ISSUER", "CUSIP", "CALL"]] for df in dfs]).drop_duplicates()

for i in range(len(dfs)):
    new = dfs[i][1][["NAME OF ISSUER", "CUSIP", "CALL", "PRN AMT"]]
    new.columns = ["NAME OF ISSUER", "CUSIP", "CALL", columns[i]]
    all_holdings = all_holdings.merge(new, on=["NAME OF ISSUER", "CUSIP", "CALL"], how='left')

In [236]:
all_holdings.fillna(0)

Unnamed: 0,NAME OF ISSUER,CUSIP,CALL,2024-08-14,2024-05-15,2024-02-14,2023-11-14,2023-08-14,2023-05-15,2023-02-14,...,2015-11-13,2015-08-14,2015-05-15,2015-02-12,2014-11-14,2014-08-14,2014-05-14,2014-02-14,2013-11-14,2013-08-14
0,Adobe Inc,00724F101,,36685,0,17130,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Aes Corp,00130H105,,314750,314750,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Apple Inc,037833100,,24400,114700,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Arcellx Inc,03940C100,,74100,74100,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Arista Networks Inc,040413106,,52275,428252,234185,0,93980,32000,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
698,Select Sector SPDR TR,81369Y605,,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10616
699,Procter & Gamble Co,742718109,,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43599
700,Select Sector SPDR TR,81369Y803,,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6256
701,US Airways Group Inc.,90341W108,,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,38817
