In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import zipfile
import os
import json
import re

In [2]:
from data_fetching.edgar_helpers import parse_companyfact_dict

In [3]:
data = pd.read_csv("../../data/companyfacts.csv")

In [4]:
data['filepath']

0        CIK0000001750.json
1        CIK0000001800.json
2        CIK0000001961.json
3        CIK0000002034.json
4        CIK0000002098.json
                ...        
18533    CIK0001984014.json
18534    CIK0001985139.json
18535    CIK0002018462.json
18536    CIK0001927578.json
18537    CIK0001979610.json
Name: filepath, Length: 18538, dtype: object

In [3]:
zip_path = "../../data/submissions.zip"
extraction_dir = '../../data'
with zipfile.ZipFile(zip_path) as z:
    file_list = [f for f in z.namelist() if re.match(r'^CIK\d+\.json$', f)]
    
    json_path = os.path.join(extraction_dir, 'CIK0000315189.json')
    z.extract('CIK0000315189.json', extraction_dir)
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

In [9]:
len(file_list)

908843

In [4]:
data['filepath'] = 'CIK0000002809.json'

In [5]:
data

{'cik': '315189',
 'entityType': 'operating',
 'sic': '3523',
 'sicDescription': 'Farm Machinery & Equipment',
 'ownerOrg': '06 Technology',
 'insiderTransactionForOwnerExists': 1,
 'insiderTransactionForIssuerExists': 1,
 'name': 'DEERE & CO',
 'tickers': ['DE'],
 'exchanges': ['NYSE'],
 'ein': '362382580',
 'description': '',
 'website': '',
 'investorWebsite': '',
 'category': 'Large accelerated filer',
 'fiscalYearEnd': '1102',
 'stateOfIncorporation': 'DE',
 'stateOfIncorporationDescription': 'DE',
 'addresses': {'mailing': {'street1': 'ONE JOHN DEERE PLACE',
   'street2': None,
   'city': 'MOLINE',
   'stateOrCountry': 'IL',
   'zipCode': '61265-8098',
   'stateOrCountryDescription': 'IL'},
  'business': {'street1': 'ONE JOHN DEERE PLACE',
   'street2': None,
   'city': 'MOLINE',
   'stateOrCountry': 'IL',
   'zipCode': '61265-8098',
   'stateOrCountryDescription': 'IL'}},
 'phone': '(309) 765-8000',
 'flags': '',
 'formerNames': [],
 'filings': {'recent': {'accessionNumber': ['0

In [6]:
parse_companyfact_dict(data)

{'stateOfIncorporation': 'DE',
 'phone': '(309) 765-8000',
 'website': None,
 'description': None,
 'ein': '362382580',
 'formerNames': None,
 'cik': '315189',
 'investorWebsite': None,
 'ownerOrg': '06 Technology',
 'entityType': 'operating',
 'sicDescription': 'Farm Machinery & Equipment',
 'filepath': 'CIK0000002809.json',
 'category': 'Large accelerated filer',
 'name': 'DEERE & CO',
 'exchanges': 'NYSE',
 'fiscalYearEnd': '1102',
 'tickers': 'DE',
 'sic': '3523',
 'stateorcountry_mailing': 'ONE JOHN DEERE PLACE, MOLINE, IL, 61265-8098',
 'stateorcountry_business': 'ONE JOHN DEERE PLACE, MOLINE, IL, 61265-8098'}

In [363]:
def process_eps_df(eps_df, look_back=3):
    eps_df = eps_df[~eps_df.frame.isna()].reset_index(drop=True).copy()
    eps_df.loc[eps_df["frame"].str.match(r"CY\d{4}$", na=False), "frame"] += "QQ"
    eps_df = eps_df.sort_values(by='end')
    
    fy_rows = eps_df[eps_df["frame"].str.match(r"CY\d{4}QQ$", na=False)].copy() 
    for idx, row in fy_rows.iterrows():
        fy_year = row["fy"]
        past_quarters = eps_df.iloc[(idx-look_back):idx]

        if len(past_quarters) == look_back:
            adjustment = past_quarters["val"].sum()
           
            eps_df.loc[eps_df.index == idx, "val"] -= adjustment
            # print(f"idx {idx}; adj {adjustment}")
    
    return eps_df
    

In [385]:
processed_df = process_eps_df(df, look_back=1)
eps_ttm = round(processed_df.iloc[-4:]['val'].sum(), 2).item()
eps_ttm

19.19

16.44

In [None]:
# P/E ratio = Current stock price / Earnings per share

In [63]:
pd.DataFrame(data['facts']['us-gaap']['AssetsCurrent']['units']['USD'])

Unnamed: 0,end,val,accn,fy,fp,form,filed,frame
0,2008-09-27,32311000000,0001193125-09-153165,2009,Q3,10-Q,2009-07-22,
1,2008-09-27,32311000000,0001193125-09-214859,2009,FY,10-K,2009-10-27,
2,2008-09-27,30006000000,0001193125-10-012091,2009,FY,10-K/A,2010-01-25,CY2008Q3I
3,2009-06-27,35170000000,0001193125-09-153165,2009,Q3,10-Q,2009-07-22,CY2009Q2I
4,2009-09-26,36265000000,0001193125-09-214859,2009,FY,10-K,2009-10-27,
...,...,...,...,...,...,...,...,...
127,2024-03-30,128416000000,0000320193-24-000069,2024,Q2,10-Q,2024-05-03,CY2024Q1I
128,2024-06-29,125435000000,0000320193-24-000081,2024,Q3,10-Q,2024-08-02,CY2024Q2I
129,2024-09-28,152987000000,0000320193-24-000123,2024,FY,10-K,2024-11-01,
130,2024-09-28,152987000000,0000320193-25-000008,2025,Q1,10-Q,2025-01-31,CY2024Q3I


In [None]:
364,980,000,000