In [21]:
import sqlite3
import pandas as pd

# Connect to the SQLite database
conn = sqlite3.connect('reports_v5.db')

# Read SQL query into a DataFrame
query = "SELECT * FROM reports"
df = pd.read_sql(query, conn, index_col='report_id')

# Define a function to calculate total debt
def calculate_total_debt(row):
    if row['DebtNoncurrent'] != 0:
        return row['DebtNoncurrent'] + row['DebtCurrent']
    elif row['LongTermDebtNoncurrent'] != 0:
        return row['LongTermDebtNoncurrent'] + row['DebtCurrent']
    else:
        return row['CapitalLeaseObligationsNoncurrent'] + row['FinanceLeaseLiabilityNoncurrent'] + \
               row['CapitalLeaseObligationsCurrent'] + row['LinesOfCreditCurrent'] + row['FinanceLeaseLiabilityCurrent']

# Apply the function to create a new column 'total_debt'
df['total_debt'] = df.apply(calculate_total_debt, axis=1)

# Print the DataFrame with the new column
print(df['total_debt'])


report_id
52        2.872000e+06
93        0.000000e+00
98        0.000000e+00
155       0.000000e+00
157       1.375088e+09
              ...     
620429    0.000000e+00
630393    0.000000e+00
656021    4.400000e+06
657474    0.000000e+00
663874    0.000000e+00
Name: total_debt, Length: 29372, dtype: float64


In [22]:
# Calculate the total number of entries
total_entries = len(df)

# Calculate the number of entries where total_debt is equal to 0
num_zero_debt_entries = (df['total_debt'] == 0).sum()

# Calculate the percentage
percent_zero_debt_entries = (num_zero_debt_entries / total_entries) * 100

# Print the percentage
print("Percentage of entries with total_debt equal to 0:", percent_zero_debt_entries, "%")

Percentage of entries with total_debt equal to 0: 70.19950973716465 %


In [23]:
df

Unnamed: 0_level_0,sec_url,Assets,AssetsCurrent,LongTermDebtCurrent,LongTermDebtNoncurrent,ShortTermBorrowings,OtherShortTermBorrowings,ConstructionLoan,BridgeLoan,ShortTermBankLoansAndNotesPayable,...,PensionAndOtherPostretirementDefinedBenefitPlansLiabilitiesNoncurrent,StockholdersEquity,LiabilitiesAndStockholdersEquity,MinorityInterest,StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest,Revenue,Revenues,NetIncomeLoss,ProfitLoss,total_debt
report_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
52,http://www.sec.gov/Archives/edgar/data/1000683...,2.587100e+07,1.368500e+07,235000.0,2.872000e+06,0.0,0.0,0.0,0.0,0.0,...,0.0,8.000000e+03,2.661200e+07,0.0,0.000000e+00,0.0,0.0,0.0,0.0,2.872000e+06
93,http://www.sec.gov/Archives/edgar/data/1001082...,8.688986e+09,3.774997e+09,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,0.0,-1.850705e+09,8.688986e+09,451000.0,-1.850254e+09,0.0,0.0,0.0,230915000.0,0.000000e+00
98,http://www.sec.gov/Archives/edgar/data/1001082...,1.303549e+10,5.423292e+09,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,0.0,1.017510e+08,1.303549e+10,1584000.0,1.033350e+08,0.0,0.0,0.0,318978000.0,0.000000e+00
155,http://www.sec.gov/Archives/edgar/data/1001601...,1.147600e+07,4.204000e+06,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,0.0,-1.646000e+06,4.433000e+06,7961000.0,-2.800270e+08,0.0,0.0,0.0,-4549000.0,0.000000e+00
157,http://www.sec.gov/Archives/edgar/data/1001604...,2.089940e+09,1.391420e+08,21324000.0,1.375088e+09,0.0,0.0,0.0,0.0,0.0,...,0.0,3.120820e+08,2.089940e+09,6816000.0,4.000000e+03,0.0,898732000.0,0.0,0.0,1.375088e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620429,https://www.sec.gov/Archives/edgar/data/108193...,1.143300e+05,2.953800e+04,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,0.0,6.833625e+07,1.143300e+05,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00
630393,https://www.sec.gov/Archives/edgar/data/50292/...,3.048153e+07,2.358870e+07,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,0.0,2.791572e+07,3.048153e+07,0.0,0.000000e+00,0.0,13843521.0,0.0,0.0,0.000000e+00
656021,https://www.sec.gov/Archives/edgar/data/183960...,2.054190e+08,7.451100e+07,1200000.0,4.400000e+06,0.0,0.0,0.0,0.0,0.0,...,0.0,-8.284000e+06,2.269350e+08,0.0,6.950000e+07,0.0,63067000.0,0.0,0.0,4.400000e+06
657474,https://www.sec.gov/Archives/edgar/data/146897...,7.075920e+05,7.041440e+05,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,0.0,6.098117e+06,7.075920e+05,0.0,0.000000e+00,0.0,1390258.0,0.0,-470583.0,0.000000e+00


In [24]:
import pandas as pd
import json

# Load the DataFrame
# Assuming 'df' is already defined

# Load the JSON file
with open('report_data_v2.json', 'r') as f:
    report_data = json.load(f)

# Create a dictionary to map report IDs to keys
report_id_to_key = {}
for key, reports in report_data.items():
    for report in reports:
        report_id_to_key[report['report_id']] = key

# Create a new column in the DataFrame using the mapping
df['key'] = df.index.map(report_id_to_key)

# Display the DataFrame with the new column
df

Unnamed: 0_level_0,sec_url,Assets,AssetsCurrent,LongTermDebtCurrent,LongTermDebtNoncurrent,ShortTermBorrowings,OtherShortTermBorrowings,ConstructionLoan,BridgeLoan,ShortTermBankLoansAndNotesPayable,...,StockholdersEquity,LiabilitiesAndStockholdersEquity,MinorityInterest,StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest,Revenue,Revenues,NetIncomeLoss,ProfitLoss,total_debt,key
report_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
52,http://www.sec.gov/Archives/edgar/data/1000683...,2.587100e+07,1.368500e+07,235000.0,2.872000e+06,0.0,0.0,0.0,0.0,0.0,...,8.000000e+03,2.661200e+07,0.0,0.000000e+00,0.0,0.0,0.0,0.0,2.872000e+06,366_4Q2011
93,http://www.sec.gov/Archives/edgar/data/1001082...,8.688986e+09,3.774997e+09,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,-1.850705e+09,8.688986e+09,451000.0,-1.850254e+09,0.0,0.0,0.0,230915000.0,0.000000e+00,484_1Q2010
98,http://www.sec.gov/Archives/edgar/data/1001082...,1.303549e+10,5.423292e+09,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,1.017510e+08,1.303549e+10,1584000.0,1.033350e+08,0.0,0.0,0.0,318978000.0,0.000000e+00,484_3Q2011
155,http://www.sec.gov/Archives/edgar/data/1001601...,1.147600e+07,4.204000e+06,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,-1.646000e+06,4.433000e+06,7961000.0,-2.800270e+08,0.0,0.0,0.0,-4549000.0,0.000000e+00,737_4Q2011
157,http://www.sec.gov/Archives/edgar/data/1001604...,2.089940e+09,1.391420e+08,21324000.0,1.375088e+09,0.0,0.0,0.0,0.0,0.0,...,3.120820e+08,2.089940e+09,6816000.0,4.000000e+03,0.0,898732000.0,0.0,0.0,1.375088e+09,805_4Q2010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620429,https://www.sec.gov/Archives/edgar/data/108193...,1.143300e+05,2.953800e+04,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,6.833625e+07,1.143300e+05,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.000000e+00,283_4Q2022
630393,https://www.sec.gov/Archives/edgar/data/50292/...,3.048153e+07,2.358870e+07,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,2.791572e+07,3.048153e+07,0.0,0.000000e+00,0.0,13843521.0,0.0,0.0,0.000000e+00,367_4Q2022
656021,https://www.sec.gov/Archives/edgar/data/183960...,2.054190e+08,7.451100e+07,1200000.0,4.400000e+06,0.0,0.0,0.0,0.0,0.0,...,-8.284000e+06,2.269350e+08,0.0,6.950000e+07,0.0,63067000.0,0.0,0.0,4.400000e+06,751_4Q2022
657474,https://www.sec.gov/Archives/edgar/data/146897...,7.075920e+05,7.041440e+05,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,...,6.098117e+06,7.075920e+05,0.0,0.000000e+00,0.0,1390258.0,0.0,-470583.0,0.000000e+00,737_4Q2015


In [26]:
import pandas as pd

# Assuming 'df' is your DataFrame containing the necessary data

filtered_df = df[df['total_debt'] != 0]

# Filter out rows where Assets are zero
filtered_df = filtered_df[filtered_df['Assets'] != 0]
filtered_df = filtered_df[filtered_df['StockholdersEquity'] != 0]

# Calculate leverage ratio
filtered_df['debt_to_assets'] = filtered_df['total_debt'] / filtered_df['Assets']
filtered_df['debt_to_equity'] = filtered_df['total_debt'] / filtered_df['StockholdersEquity']

# Group by key and calculate median leverage ratio
grouped_df = filtered_df.groupby('key')
median_debt_to_assets_ratio = grouped_df['debt_to_assets'].median()
median_debt_to_equity_ratio = grouped_df['debt_to_equity'].median()

# Count non-zero values for each group
count_debt_to_assets = grouped_df['debt_to_assets'].count()
count_debt_to_equity = grouped_df['debt_to_equity'].count()

# Create DataFrame for median ratios and counts
median_ratios_df = pd.DataFrame({
    'median_debt_to_assets_ratio': median_debt_to_assets_ratio,
    'median_debt_to_equity_ratio': median_debt_to_equity_ratio,
    'n_debt_to_assets': count_debt_to_assets,
    'n_debt_to_equity': count_debt_to_equity
}).reset_index()

# Extracting SIC, Quarter, and Year using regular expressions
pattern = r'(\d+)_([1-4]Q)(\d{4})'
median_ratios_df[['SIC', 'Quarter', 'Year']] = median_ratios_df['key'].str.extract(pattern)

# Dropping the original 'key' column
median_ratios_df.drop(columns=['key'], inplace=True)

# Sort the DataFrame in reverse chronological order using the Quarter and Year columns
median_ratios_df['Year'] = pd.to_datetime(median_ratios_df['Year']).dt.year
median_ratios_df['Quarter'] = median_ratios_df['Quarter'].str.replace('Q', '').astype(int)
median_ratios_df = median_ratios_df.sort_values(by=['Year', 'Quarter'], ascending=[False, False])

print(median_ratios_df)



     median_debt_to_assets_ratio  median_debt_to_equity_ratio  \
54                      0.083439                     0.013485   
96                      0.126533                     0.222683   
124                     0.306787                     0.536835   
220                     0.148095                     0.084721   
230                     0.012652                    -0.291954   
..                           ...                          ...   
214                     0.111341                     0.232891   
111                     0.050485                     0.084740   
222                     0.011523                     0.015685   
40                      0.231302                     0.525619   
87                      0.259774                     0.630666   

     n_debt_to_assets  n_debt_to_equity  SIC  Quarter  Year  
54                235               235  283        4  2022  
96                 87                87  367        4  2022  
124                10            

In [27]:
# Save DataFrame to Excel file
excel_filename = "median_ratios.xlsx"
median_ratios_df.to_excel(excel_filename, index=False)

print("DataFrame successfully saved to Excel file:", excel_filename)

DataFrame successfully saved to Excel file: median_ratios.xlsx
