In [1]:
from sec_edgar_api import EdgarClient
import json
import os
from tqdm import tqdm

In [2]:
def get_filings(ciks, target_year, industry):
    edgar = EdgarClient(user_agent="<Sample Company Name> <Admin Contact>@<Sample Company Domain>")

    # List to collect all 10-K filings for the target year across companies
    all_filings_for_year = []

    error_ciks = []

    # Loop through each CIK (company)
    for cik in ciks:
        try:
            # Get the submission data for the CIK
            data = edgar.get_submissions(cik)

            # Extract the company name (tickers) from the submission data
            company = data.get('tickers', [])[0] if 'tickers' in data else "Unknown_Company"
            
            recent_filings = data.get('filings', {}).get('recent', {})
            forms = recent_filings.get('form', [])
            accession_numbers = recent_filings.get('accessionNumber', [])
            filing_dates = recent_filings.get('filingDate', [])

            # Filter for 10-K filings from the specific year
            ten_k_filings = [
                {"company": company, "accessionNumber": accession, "filingDate": filing_date}
                for form, accession, filing_date in zip(forms, accession_numbers, filing_dates)
                if form == "10-K" and filing_date.startswith(str(target_year))
            ]

            # Add the filtered 10-K filings to the all_filings_for_year list
            all_filings_for_year.extend(ten_k_filings)

            # Prepare formatted URLs for each 10-K filing
            for filing in ten_k_filings:
                accession_number = filing.get("accessionNumber", "")
                filing_date = filing.get("filingDate", "")
                company = filing.get("company", "")
                
                if accession_number and filing_date:
                    year = filing_date[:4]  # Extract the year from the filing date
                    formatted_accession = accession_number.replace("-", "")
                    url = f"https://www.sec.gov/Archives/edgar/data/{cik}/{formatted_accession}/{accession_number}.txt"
                    filing["url"] = url  # Add the URL to the filing

        except Exception as e:
            error_ciks.append(cik)
            # print(f"Error getting submissions for CIK: {cik}")
            # print(e)
            continue

    print(f"Error CIKs: {error_ciks}")
    # Save all 10-K filings for the target year in a single JSON file
    output_file_path = f'./10K_URL_{industry}/All_10K_Filings_{target_year}.json'
    os.makedirs(os.path.dirname(output_file_path), exist_ok=True)  # Create directory if it doesn't exist
    
    # Write the accumulated filings to a JSON file
    with open(output_file_path, 'w') as output_file:
        json.dump(all_filings_for_year, output_file, indent=4)

In [3]:
# Industries: TECH, OIL, HEALTH

target_years = [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]
industry = "TECH"
# ciks = ["320193", "1018724", "1045810", "1065280", "1341439", "858877", "1326801", "804328", "1645590", "1535527", "1633917", "1477333", "712515"]
# ciks = ["320193", "1018724", "1045810", "1065280", "1341439", "858877", "1326801", "804328", "1645590", "1535527", "1633917", "1477333", "712515"]

with open("./ciks.json", 'r') as file:
        ciks_data = json.load(file)

ciks = ciks_data[industry]

for year in tqdm(target_years):
    print(f"Getting 10-K filings for the year {year}...")
    get_filings(ciks, year, industry)
    print(f"10-K filings for the year {year} have been saved!")


# "796343", "0000789019, "0001108524",  "0001318605", "51143", "50863", "796343", "2488", "1571996", "1321655",

  0%|          | 0/11 [00:00<?, ?it/s]

Getting 10-K filings for the year 2014...


  9%|▉         | 1/11 [00:31<05:18, 31.85s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2014 have been saved!
Getting 10-K filings for the year 2015...


 18%|█▊        | 2/11 [00:55<04:05, 27.28s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2015 have been saved!
Getting 10-K filings for the year 2016...


 27%|██▋       | 3/11 [01:22<03:36, 27.12s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2016 have been saved!
Getting 10-K filings for the year 2017...


 36%|███▋      | 4/11 [01:55<03:24, 29.28s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2017 have been saved!
Getting 10-K filings for the year 2018...


 45%|████▌     | 5/11 [02:23<02:53, 28.97s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2018 have been saved!
Getting 10-K filings for the year 2019...


 55%|█████▍    | 6/11 [03:02<02:41, 32.24s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2019 have been saved!
Getting 10-K filings for the year 2020...


 64%|██████▎   | 7/11 [03:30<02:03, 30.91s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2020 have been saved!
Getting 10-K filings for the year 2021...


 73%|███████▎  | 8/11 [03:56<01:28, 29.35s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2021 have been saved!
Getting 10-K filings for the year 2022...


 82%|████████▏ | 9/11 [04:26<00:58, 29.40s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2022 have been saved!
Getting 10-K filings for the year 2023...


 91%|█████████ | 10/11 [04:50<00:27, 28.00s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2023 have been saved!
Getting 10-K filings for the year 2024...


100%|██████████| 11/11 [05:18<00:00, 28.96s/it]

Error CIKs: ['353569', '1288776', '70878', '356028', '1439404', '1463172', '17275', '1171012', '1087423', '1627610', '944957', '1124610', '1459987', '1512157', '1695933', '1659165', '1353283', '1126225', '1068875', '1380846', '1460800', '1563962', '1502074', '1324272', '1652525', '1637243', '1568378', '1477603', '1547752', '1546363', '1402043', '1579984', '1578981', '1600357', '1629636', '1356104', '1509432', '1664974', '1367623', '1640755', '1171023', '1525169']
10-K filings for the year 2024 have been saved!



