In [1]:
import requests
import json
import pprint
import pandas as pd
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import re

CIK_FULL = "CIK0000763212"
CIK_STRIPPED = "763212"
submissions_url = f"https://data.sec.gov/submissions/{CIK_FULL}.json"
filing_baseurl = f"https://www.sec.gov/Archives/edgar/data/{CIK_STRIPPED}"

In [2]:
headers = {
    # "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35"
    "User-Agent": "My Name my.email@gmail.com"
}

response = requests.get(submissions_url, headers=headers)
print(response)

if response.status_code == 200:
    submissions_json = json.loads(response.text)
    pprint.pprint(submissions_json)
else:
    print("Error:", response.status_code)

# accession number for the filing
# Go to filing index.json to list all attached files.
# Pull the relevant exhibit (XML for 13F, text/HTML for 13G/A). Parse + extract.



<Response [200]>
{'addresses': {'business': {'city': 'PASADENA',
                            'country': None,
                            'countryCode': None,
                            'foreignStateTerritory': None,
                            'isForeignLocation': None,
                            'stateOrCountry': 'CA',
                            'stateOrCountryDescription': 'CA',
                            'street1': '177 EAST COLORADO BLVD.',
                            'street2': '11TH FLOOR',
                            'zipCode': '91105'},
               'mailing': {'city': 'PASADENA',
                           'country': None,
                           'countryCode': None,
                           'foreignStateTerritory': None,
                           'isForeignLocation': 0,
                           'stateOrCountry': 'CA',
                           'stateOrCountryDescription': 'CA',
                           'street1': '177 EAST COLORADO BLVD.',
                  

In [3]:
recent_filings_df = pd.DataFrame(submissions_json['filings']['recent'])
recent_filings_df.head()

Unnamed: 0,accessionNumber,filingDate,reportDate,acceptanceDateTime,act,form,fileNumber,filmNumber,items,core_type,size,isXBRL,isInlineXBRL,primaryDocument,primaryDocDescription
0,0001085146-25-005483,2025-08-25,2025-06-30,2025-08-25T21:04:05.000Z,34.0,N-PX,028-01469,251251792.0,,N-PX,235384,0,0,xslN-PX_X01/primary_doc.xml,
1,0001085146-25-005195,2025-08-14,,2025-08-14T17:29:35.000Z,,SCHEDULE 13G/A,,,,SCHEDULE 13G/A,7294,0,0,xslSCHEDULE_13G_X01/primary_doc.xml,
2,0001085146-25-005194,2025-08-14,,2025-08-14T17:28:54.000Z,,SCHEDULE 13G/A,,,,SCHEDULE 13G/A,6970,0,0,xslSCHEDULE_13G_X01/primary_doc.xml,
3,0001085146-25-005193,2025-08-14,,2025-08-14T17:27:09.000Z,,SCHEDULE 13G/A,,,,SCHEDULE 13G/A,6996,0,0,xslSCHEDULE_13G_X01/primary_doc.xml,
4,0001085146-25-005192,2025-08-14,,2025-08-14T17:25:49.000Z,,SCHEDULE 13G/A,,,,SCHEDULE 13G/A,7052,0,0,xslSCHEDULE_13G_X01/primary_doc.xml,


In [4]:
def get_f_data(acc_numbers: list):
    data = []
    num_of_filings = 5
    try:
        for i in range(num_of_filings):
            acc_stripped = acc_numbers[i].replace('-', '')
            # Get index.json - items table to list the files available in the filing
            index_response = requests.get(f"{filing_baseurl}/{acc_stripped}/index.json", headers=headers)
            items_json = json.loads(index_response.text)['directory']['item']

            # For 13F forms: find infotable file name
            info_file = ""
            for item in items_json:
                if "infotable" in item["name"]:
                    info_file = item["name"]
                    print("Info table file:", info_file)
                    break
            if not info_file:
                print("Info table not found")
            # Get infotable XML file 
            xml_response = requests.get(f"{filing_baseurl}/{acc_stripped}/{info_file}", headers=headers) # xml
            root = ET.fromstring(xml_response.content)
            # extract namespace
            if root.tag.startswith('{'):
                ns_uri = root.tag.split("}")[0].strip("{")
                ns = {"ns1": ns_uri}  # namespace prefix is ns1:
                infotables = root.findall(".//ns1:infoTable", ns)

            # one <infoTable> entry per holding
            for infotable in infotables: # .// XPath expression “search anywhere under the current node, recursively.”
                entry = {
                    "accession_number": acc_numbers[i],
                    "issuer": infotable.findtext("ns1:nameOfIssuer", namespaces=ns), #* findtext: Find text for first matching element by tag name or path.
                    "class": infotable.findtext("ns1:titleOfClass", namespaces=ns),
                    "cusip": infotable.findtext("ns1:cusip", namespaces=ns),
                    "value": int(infotable.findtext("ns1:value", default=-1, namespaces=ns)),
                    "shares": int(infotable.findtext("ns1:shrsOrPrnAmt/ns1:sshPrnamt", default=-1, namespaces=ns)),
                    "share_type": infotable.findtext("ns1:shrsOrPrnAmt/ns1:sshPrnamtType", namespaces=ns),
                    "discretion": infotable.findtext("ns1:investmentDiscretion", namespaces=ns),
                    "voting_sole": infotable.findtext("ns1:votingAuthority/ns1:Sole", namespaces=ns),
                    "voting_shared": infotable.findtext("ns1:votingAuthority/ns1:Shared", namespaces=ns),
                    "voting_none": infotable.findtext("ns1:votingAuthority/ns1:None", namespaces=ns),
                }
                data.append(entry)
        return data
    
    except Exception as e:
        print("Error:", e)

In [5]:
thirteen_f = recent_filings_df.loc[recent_filings_df["form"] == "13F-HR"]
acc_f = list(thirteen_f["accessionNumber"])
data_f = get_f_data(acc_f)
df_f = pd.DataFrame(data_f)
df_f.head()

Info table file: infotable.xml
Info table file: infotable.xml
Info table file: infotable.xml
Info table file: infotable.xml
Info table file: infotable.xml


Unnamed: 0,accession_number,issuer,class,cusip,value,shares,share_type,discretion,voting_sole,voting_shared,voting_none
0,0001085146-25-004804,ABBOTT LABS,COM,002824100,119075397,875490,SH,SOLE,836220,0,39270
1,0001085146-25-004804,ACCURAY INC,COM,004397105,46169,33700,SH,SOLE,33700,0,0
2,0001085146-25-004804,ADOBE INC,COM,00724F101,1608467527,4157536,SH,SOLE,4056927,0,100609
3,0001085146-25-004804,ADVANCED MICRO DEVICES INC,COM,007903107,31317330,220700,SH,SOLE,220700,0,0
4,0001085146-25-004804,AECOM,COM,00766T100,1079308621,9563252,SH,SOLE,9239052,0,324200


In [11]:
thirteen_g = recent_filings_df.loc[recent_filings_df["form"].isin(["SC 13G/A", "SC 13G"])]
acc_numbers = list(thirteen_g["accessionNumber"])

data = []
num_of_filings = 5
try:
    for i in range(num_of_filings):
        acc_stripped = acc_numbers[i].replace('-', '')
        # Get index.json - items table to list the files available in the filing
        index_response = requests.get(f"{filing_baseurl}/{acc_stripped}/index.json", headers=headers)
        items_json = json.loads(index_response.text)['directory']['item']

        # Get name of primary filing document
        doc_name = "nxt_111124.htm"

        response = requests.get(f"{filing_baseurl}/{acc_stripped}/{doc_name}", headers=headers)
        soup = BeautifulSoup(response.content, "html.parser")
        text = soup.get_text(" ", strip=True)
        # print(text)
        # Regex extractions
        cusip_match = re.search(r"CUSIP\s+No\.\s+([0-9A-Z]{9})", text)
        cusip = cusip_match.group(1) if cusip_match else None

        # Extract shares
        shares_match = re.search(r"Amount Beneficially Owned:\s*([\d,]+)", text)
        shares = shares_match.group(1) if shares_match else None

        # Extract percent of class
        pct_match = re.search(r"Percent of Class:\s*([\d\.]+%)", text)
        percent = pct_match.group(1) if pct_match else None
        print("CUSIP:", cusip, "Shares:", shares, "Percent of Class:", percent)
        
            
except Exception as e:
    print("Error:", e)

  k = self.parse_starttag(i)


CUSIP: None Shares: None Percent of Class: None
CUSIP: None Shares: None Percent of Class: None
CUSIP: None Shares: None Percent of Class: None
CUSIP: None Shares: None Percent of Class: None
CUSIP: None Shares: None Percent of Class: None
