# Fit Autogluon Tabular Only Local

Use the Autogluon AutoML library to predict ratings using tabular data locally (mostly just financial statement variables).

In [3]:
# Packages
import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor
import os

## Load Data

In [4]:
# list of files in '../../../Data/All_Data/All_Data_Fixed_Quarter_Dates'
file_list = [f for f in os.listdir(r'../../../Data/All_Data/All_Data_Fixed_Quarter_Dates') if f.endswith('.parquet')]
# read in all parquet files
df = pd.concat([pd.read_parquet(r'../../../Data/All_Data/All_Data_Fixed_Quarter_Dates/' + f) for f in file_list])
print('dataframe')
print(df)

dataframe
    ticker fixed_quarter_date earnings_call_date Rating  \
0     AAPL         2014-07-01         2014-04-23     AA   
1     AAPL         2014-10-01         2014-07-22     AA   
2     AAPL         2015-01-01         2014-10-20     AA   
3     AAPL         2015-04-01         2015-01-27     AA   
4     AAPL         2015-07-01         2015-04-27     AA   
..     ...                ...                ...    ...   
912    ZTS         2015-10-01         2015-08-04    BBB   
913    ZTS         2016-01-01         2015-11-03    BBB   
914    ZTS         2016-04-01         2016-02-16    BBB   
915    ZTS         2016-07-01         2016-05-04    BBB   
916    ZTS         2016-10-01         2016-08-03    BBB   

                     Rating Agency Name rating_date      CR_source  \
0    Standard & Poor's Ratings Services  2014-05-27  Supplementary   
1    Standard & Poor's Ratings Services  2014-05-27  Supplementary   
2    Standard & Poor's Ratings Services  2014-05-27  Supplementary   
3

In [5]:
# Print out column names
print('column names')
for col in df.columns:
    print(col)

column names
ticker
fixed_quarter_date
earnings_call_date
Rating
Rating Agency Name
rating_date
CR_source
Rating Rank AAA is 10
Next Rating
Next Rating Date
Previous Rating
Previous Rating Date
next_rating_date_or_end_of_data
credit_rating_year
previous_fixed_quarter_date
days_since_call_on_fixed_quarter
quarter
calls_year
transcript
Calls_source
date
symbol
reportedCurrency
cik
fillingDate
acceptedDate
calendarYear
period
cashAndCashEquivalents
shortTermInvestments
cashAndShortTermInvestments
netReceivables
inventory
otherCurrentAssets
totalCurrentAssets
propertyPlantEquipmentNet
goodwill
intangibleAssets
goodwillAndIntangibleAssets
longTermInvestments
taxAssets
otherNonCurrentAssets
totalNonCurrentAssets
otherAssets
totalAssets
accountPayables
shortTermDebt
taxPayables
deferredRevenue
otherCurrentLiabilities
totalCurrentLiabilities
longTermDebt
deferredRevenueNonCurrent
deferredTaxLiabilitiesNonCurrent
otherNonCurrentLiabilities
totalNonCurrentLiabilities
otherLiabilities
capitalLeas

In [6]:
# Removing columns: 'Rating Rank AAA is 10', 'transcript', 'Investment_Grade', 'Change Direction Since Last Fixed Quarter Date', 'Change Since Last Fixed Quarter Date', 'Next Rating', 'Next Rating Date', 'next_rating_date_or_end_of_data'
df = df.drop(columns=['Rating Rank AAA is 10', 
                      'transcript', 
                      'Investment_Grade', 
                      'Change Direction Since Last Fixed Quarter Date', 
                      'Change Since Last Fixed Quarter Date', 
                      'Next Rating', 
                      'Next Rating Date', 
                      'next_rating_date_or_end_of_data'])

In [None]:
# Get train df
train_df = df[df['train_test_80_20'] == 'train'].reset_index(drop=True)

## Fit AutoGluon

In [8]:
# Convert from pandas to autogluon
train_data = TabularDataset(train_df)

In [9]:
# Fit models
predictor = TabularPredictor(label='Rating').fit(train_data=train_data)

No path specified. Models will be saved in: "AutogluonModels\ag-20240321_064453"
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels\ag-20240321_064453"
AutoGluon Version:  1.0.0
Python Version:     3.11.8
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.19045
CPU Count:          8
Memory Avail:       7.38 GB / 15.68 GB (47.1%)
Disk Space Avail:   18.61