In [25]:
import requests
import json
import os
import time
import pandas as pd

In [27]:
# read in the list of CIK's prepared using "Step1_getCIKma_SEC.ipynb"
df = pd.read_csv(r"C:\Users\barid\Documents\Papers\Muni_adv_PBF\ma_filings\sec_cik_ma.csv", index_col=False)
df

Unnamed: 0,Registrant Name,File Number,CIK,Year,Month,yearMth,File No
0,&PARTNERS,867-01682,107136,2025,4,2025-04,
1,"30 Three Sixty Public Finance, Inc.",867-02350,1733578,2025,4,2025-04,
2,A. M. Peche & Associates LLC,867-00111,1613201,2025,4,2025-04,
3,A.BRIDGE REALVEST SECURITIES CORPORATION,867-01291,1005399,2025,4,2025-04,
4,A&C Galvan Group LLC,867-02679,2013292,2025,4,2025-04,
...,...,...,...,...,...,...,...
66076,"YOUNG AMERICA CAPITAL, LLC",,1463911,2015,4,,867-01657
66077,Yuba Group LLC,,1612952,2015,4,,867-00105
66078,ZIONS FIRST NATIONAL BANK /MSD,,797595,2015,4,,867-00724
66079,"Zions Public Finance, Inc.",,1628261,2015,4,,867-01453


In [29]:
# remove duplicates by cik
uniq_vals = df['CIK'].drop_duplicates().reset_index(drop=True)
df_cik2 = pd.DataFrame({'cik':uniq_vals})
df_cik2.shape
df_cik2

Unnamed: 0,cik
0,107136
1,1733578
2,1613201
3,1005399
4,2013292
...,...
920,1620617
921,899227
922,776831
923,1622487


In [31]:
# add leading zeros and convert to list
df_cik2['cik2'] = df_cik2['cik'].astype(str).str.zfill(10)
df_cik2

Unnamed: 0,cik,cik2
0,107136,0000107136
1,1733578,0001733578
2,1613201,0001613201
3,1005399,0001005399
4,2013292,0002013292
...,...,...
920,1620617,0001620617
921,899227,0000899227
922,776831,0000776831
923,1622487,0001622487


In [45]:
def download_filing_content(download_url, filename, headers):
    """Downloads content from a given URL."""
    try:
        print(f"Downloading: {filename}")
        response = requests.get(download_url, headers=headers)
        response.raise_for_status()
        with open(filename, 'wb') as f:
            f.write(response.content)
        print(f"Saved to: {filename}")
        return True
    except requests.exceptions.RequestException as e:
        print(f"Error downloading {filename}: {e}")
        return False

In [47]:
'''
The function downloads both because they serve different purposes:
The primary document (e.g., the XML file) has the complete, structured data.
The .txt file offers a more readable, though potentially less detailed, summary.
The function downloads both and saves them with slightly different names to allow you to have both versions for each filing.
'''

def download_ma_filing_documents(cik, accession_number, primary_document, base_download_folder="ma_filings"):
    """Downloads the primary document and .txt overview for a specific MA filing to a CIK-specific subfolder."""
    cik_no_zeros = cik.lstrip('0')
    accession_no_hyphens = accession_number.replace('-', '')
    primary_download_url = f"https://www.sec.gov/Archives/edgar/data/{cik_no_zeros}/{accession_no_hyphens}/{primary_document}"
    txt_download_url = f"https://www.sec.gov/Archives/edgar/data/{cik_no_zeros}/{accession_no_hyphens}/{accession_number}.txt"

    cik_subfolder = os.path.join(base_download_folder, cik)
    os.makedirs(cik_subfolder, exist_ok=True)

    sanitized_filename = primary_document.replace('/', '_')
    primary_filename = os.path.join(cik_subfolder, f"MA_{accession_number.replace('-', '_')}_{sanitized_filename}")
    txt_filename = os.path.join(cik_subfolder, f"MA_{accession_number.replace('-', '_')}.txt")

    headers = {
        'User-Agent': 'edgar_ma/1 (bmalakar3@gatech.edu)'  # Ensure your User-Agent is correct
    }

    primary_success = download_filing_content(primary_download_url, primary_filename, headers)
    txt_success = download_filing_content(txt_download_url, txt_filename, headers)

    if primary_success and txt_success:
        return True
    elif primary_success or txt_success:
        return "partial"  # Indicate partial success
    else:
        return False


In [None]:
if __name__ == "__main__":
    cik_list = df_cik2['cik2'].tolist()
    # cik_list = ["0001614240"]
    # cik_list = ["0000913414"]
    print(f"Beginnig iterations for {len(cik_list)} items")
    # base_download_folder = "ma_filings"
    # storage_path = r"C:\Users\barid\Documents\Papers\Muni_adv_PBF\Data"
    storage_path = r"D:\PhD_Fin\Muni_adv_PBF\Data2"
    base_download_folder = storage_path    
    os.makedirs(base_download_folder, exist_ok=True)

    headers = {
        'User-Agent': 'edgar_ma/2 (bmalakar3@gatech.edu)'  # Ensure your User-Agent is correct
    }

    for cik in cik_list:
        print(f"\n--- Processing CIK: {cik} ---")
        url = f"https://data.sec.gov/submissions/CIK{cik}.json"

        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            data = response.json()

            filings1 = data.get('filings', {}).get('recent', [])
            filings2 = data.get('filings', {}).get('files', []) 
            
            filings = filings1.copy()  # Start with recent
            
            # Base URL for SEC submissions
            SEC_BASE_URL = "https://data.sec.gov/submissions/"
            
            for file_ref in filings2:
                # Construct the proper URL using the 'name' field
                file_url = f"{SEC_BASE_URL}{file_ref['name']}"
                
                try:
                    older_data = requests.get(
                        file_url,
                        headers=headers  # Make sure to include your headers
                    ).json()
                    
                    # Some older filings might use different structures
                    if isinstance(older_data, list):
                        filings.extend(older_data)
                    else:
                        filings.extend(older_data.get('filings', []))
                        
                except Exception as e:
                    print(f"Failed to fetch {file_url}: {str(e)}")
                    continue
            
            # Now 'filings' contains both recent and historical filings
        

            if filings:
                print(f"Form MA and related filings for CIK {cik}:")
                for i in range(len(filings['accessionNumber'])):
                    accession_number = filings['accessionNumber'][i]
                    filing_type = filings['form'][i]
                    filing_date = filings['filingDate'][i]
                    primary_document = filings.get('primaryDocument', [])[i]

                    print(f"- [{filing_type}] - {accession_number} ({filing_date}) - Primary Document: {primary_document}")

                    if filing_type.startswith("MA"):
                        download_ma_filing_documents(cik, accession_number, primary_document, base_download_folder)
                        time.sleep(0.1)  # Be respectful of the API
            else:
                print(f"No filings found for CIK: {cik}")

        except requests.exceptions.RequestException as e:
            print(f"Error fetching filings data for CIK {cik}: {e}")
        except json.JSONDecodeError:
            print(f"Error decoding JSON response for CIK {cik}.")

    print("\nScript finished.")


Beginnig iterations for 925 items

--- Processing CIK: 0000107136 ---
Form MA and related filings for CIK 0000107136:
- [13F-HR] - 0001214659-25-007673 (2025-05-15) - Primary Document: xslForm13F_X02/primary_doc.xml
- [N-PX] - 0000107136-25-000006 (2025-04-09) - Primary Document: xslN-PX_X01/primary_doc.xml
- [X-17A-5] - 0000107136-25-000005 (2025-03-18) - Primary Document: xslX-17A-5_X01/primary_doc.xml
- [MA-A] - 0000107136-25-000003 (2025-03-05) - Primary Document: xslFormMA_X01/primary_doc.xml
Downloading: D:\PhD_Fin\Muni_adv_PBF\Data2\0000107136\MA_0000107136_25_000003_xslFormMA_X01_primary_doc.xml
Saved to: D:\PhD_Fin\Muni_adv_PBF\Data2\0000107136\MA_0000107136_25_000003_xslFormMA_X01_primary_doc.xml
Downloading: D:\PhD_Fin\Muni_adv_PBF\Data2\0000107136\MA_0000107136_25_000003.txt
Saved to: D:\PhD_Fin\Muni_adv_PBF\Data2\0000107136\MA_0000107136_25_000003.txt
- [13F-HR] - 0001214659-25-002647 (2025-02-14) - Primary Document: xslForm13F_X02/primary_doc.xml
- [13F-HR] - 0001214659-2