In [1]:
# 02_fetch_recent_filings.ipynb

import requests
import json
import pandas as pd
import os

# Ensure data directory exists
os.makedirs('../data', exist_ok=True)

# Load the CIK list and ensure leading zeros
cik_df = pd.read_csv('../data/masterworks_entity_list.csv')
cik_df['CIK'] = cik_df['CIK'].astype(str).str.zfill(10)

# Define headers for the request
headers = {
    'User-Agent': 'Ahmet Besiroglu (abesiroglu@masterworks.com)'
}

all_filings_data = []

for _, row in cik_df.iterrows():
    cik = row['CIK']
    company_name = row['Company Name']
    url = f'https://data.sec.gov/submissions/CIK{cik}.json'
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        submission_history = response.json()
        recent_filings = submission_history.get('filings', {}).get('recent', {})
        if isinstance(recent_filings, dict):
            forms = recent_filings.get('form', [])
            accession_numbers = recent_filings.get('accessionNumber', [])
            filing_dates = recent_filings.get('filingDate', [])
            primary_documents = recent_filings.get('primaryDocument', [])

            # Filter for the most recent 1-K and 1-SA filings
            latest_filings = {'1-K': None, '1-SA': None}
            for form, accession_number, filing_date, primary_document in zip(forms, accession_numbers, filing_dates, primary_documents):
                if form in latest_filings:
                    if latest_filings[form] is None or filing_date > latest_filings[form]['filing_date']:
                        latest_filings[form] = {
                            'CIK': cik,
                            'Company Name': company_name,
                            'form': form,
                            'accession_number': accession_number,
                            'filing_date': filing_date,
                            'primary_document': primary_document,
                            'document_url': f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number.replace('-', '')}/{primary_document}",
                            'txt_file_url': f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number.replace('-', '')}/{accession_number}.txt"
                        }
            for filing in latest_filings.values():
                if filing:
                    all_filings_data.append(filing)
    else:
        print(f"Failed to download data for CIK {cik}. Status code: {response.status_code}")

# Save all filings data to a DataFrame
df_filings = pd.DataFrame(all_filings_data)
print(df_filings)

# Load the active SPV entities list and filter the output DataFrame
active_spv_df = pd.read_csv('../data/active_spv_entities.csv')

# Perform case insensitive merge
df_filings['Company Name'] = df_filings['Company Name'].str.upper()
active_spv_df['Entity'] = active_spv_df['Entity'].str.upper()

filtered_df_filings = df_filings[df_filings['Company Name'].isin(active_spv_df['Entity'])]

# Save the filtered filings data to a CSV file
output_path = '../data/recent_filings.csv'
filtered_df_filings.to_csv(output_path, index=False)
print(f"Most recent filings data has been written to '{output_path}'")


            CIK              Company Name  form      accession_number  \
0    0001738134      Masterworks 001, LLC   1-K  0001493152-24-016270   
1    0001738134      Masterworks 001, LLC  1-SA  0001493152-23-034648   
2    0001750148      Masterworks 002, LLC   1-K  0001493152-22-010885   
3    0001750148      Masterworks 002, LLC  1-SA  0001493152-22-027066   
4    0001781737      Masterworks 003, LLC   1-K  0001493152-20-007362   
..          ...                       ...   ...                   ...   
575  0001979634  Masterworks Vault 2, LLC   1-K  0001493152-24-016086   
576  0001979634  Masterworks Vault 2, LLC  1-SA  0001493152-23-034567   
577  0001983146  Masterworks Vault 3, LLC   1-K  0001493152-24-015020   
578  0001983146  Masterworks Vault 3, LLC  1-SA  0001493152-23-040594   
579  0001989522  Masterworks Vault 4, LLC   1-K  0001493152-24-014042   

    filing_date            primary_document  \
0    2024-04-26  xsl1-K_X01/primary_doc.xml   
1    2023-09-28              