In [3]:
import csv
import itertools
import requests
from io import BytesIO
from zipfile import ZipFile
import os

from urllib.request import urlretrieve

base_url = "https://www.sec.gov/Archives/edgar/full-index/{year}/{qtr}/master.zip"
output_directory = "./edgar_files"  # You can change this to your desired directory

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0", 'Accept': 'application/json'}
query_parameters = {"downloadformat": "zip"}

# Define the range of years and quarters
years = range(2021, 2022)  # Adjust the end year as needed
quarters = ['QTR1', 'QTR2', 'QTR3', 'QTR4']

# Download and unzip files
for year, qtr in itertools.product(years, quarters):
    url = base_url.format(year=year, qtr=qtr)
    print(url)
    response = requests.get(url, headers=headers, params=query_parameters)
    
    print(response)
    if response.status_code == 200:
        # Open the ZIP file without extracting
        with ZipFile(BytesIO(response.content)) as zip_file:
            # Read the contents of master.idx
            with zip_file.open("master.idx") as master_file:
                # Convert bytes to string and split by lines
                content = master_file.read().decode("utf-8").splitlines()
                
                # Find the index where the table starts
                table_start_index = content.index("--------------------------------------------------------------------------------")
                
                # Use csv.reader to parse the table
                table_data = list(csv.reader(content[table_start_index + 2:], delimiter='|'))
                
                # Print the first few rows of the table
                print(f"\nContents of {year} {qtr} master.idx:")
                for row in table_data[:5]:
                    print(row)
        
        print(f"Downloaded and processed {year} {qtr} master.idx")
    else:
        print(f"Failed to download {year} {qtr}. Status code: {response.status_code}")

https://www.sec.gov/Archives/edgar/full-index/2021/QTR1/master.zip
<Response [200]>

Contents of 2021 QTR1 master.idx:
['1000045', 'NICHOLAS FINANCIAL INC', '4/A', '2021-02-12', 'edgar/data/1000045/0001398344-21-003309.txt']
['1000045', 'NICHOLAS FINANCIAL INC', '4', '2021-02-08', 'edgar/data/1000045/0001496701-21-000001.txt']
['1000045', 'NICHOLAS FINANCIAL INC', '4', '2021-02-09', 'edgar/data/1000045/0001398344-21-002769.txt']
['1000045', 'NICHOLAS FINANCIAL INC', '8-K', '2021-01-25', 'edgar/data/1000045/0001564590-21-002004.txt']
['1000045', 'NICHOLAS FINANCIAL INC', '8-K', '2021-02-03', 'edgar/data/1000045/0001564590-21-003940.txt']
Downloaded and processed 2021 QTR1 master.idx
https://www.sec.gov/Archives/edgar/full-index/2021/QTR2/master.zip
<Response [200]>

Contents of 2021 QTR2 master.idx:
['1000045', 'NICHOLAS FINANCIAL INC', '4', '2021-06-07', 'edgar/data/1000045/0001398344-21-012753.txt']
['1000045', 'NICHOLAS FINANCIAL INC', '4', '2021-06-10', 'edgar/data/1000045/000100004