# Create Combined All Data - Fixed Quarter Dates

In [1]:
# Whether this is a sample/debug run or not
DEBUG = False

In [2]:
# Packages
import pandas as pd
import openpyxl

In [3]:
# Load ~\Box\STAT 222 Capstone\Intermediate Data\Credit_Rating\credit_ratings_on_fixed_quarter_dates_with_earnings_call_date.csv
cr_and_dates = pd.read_csv(r'~\Box\STAT 222 Capstone\Intermediate Data\Credit_Rating\credit_ratings_on_fixed_quarter_dates_with_earnings_call_date.csv')
print(cr_and_dates)

     ticker          fixed_quarter_date earnings_call_date Rating  \
0      ABBV  2014-10-01 00:00:00.000000         2014-07-25      A   
1      ABBV  2015-01-01 00:00:00.000000         2014-10-31      A   
2      ABBV  2015-04-01 00:00:00.000000         2015-01-30      A   
3      ABBV  2015-07-01 00:00:00.000000         2015-04-23      A   
4      ABBV  2015-10-01 00:00:00.000000         2015-07-24      A   
...     ...                         ...                ...    ...   
4393    ZTS  2015-10-01 00:00:00.000000         2015-08-04    BBB   
4394    ZTS  2016-01-01 00:00:00.000000         2015-11-03    BBB   
4395    ZTS  2016-04-01 00:00:00.000000         2016-02-16    BBB   
4396    ZTS  2016-07-01 00:00:00.000000         2016-05-04    BBB   
4397    ZTS  2016-10-01 00:00:00.000000         2016-08-03    BBB   

                      Rating Agency Name rating_date         Source  \
0     Standard & Poor's Ratings Services  2014-07-18       Original   
1     Standard & Poor's Ratin

In [4]:
# Load ~\Box\STAT 222 Capstone\Intermediate Data\Calls\calls.csv
if DEBUG:
    calls = pd.read_csv(r'~\Box\STAT 222 Capstone\Intermediate Data\Calls\calls.csv', nrows = 10)
else:
    calls = pd.read_csv(r'~\Box\STAT 222 Capstone\Intermediate Data\Calls\calls.csv')
# Delete the first column (index)
calls = calls.iloc[:,1:]
# Convert date to be just the date part
calls['date'] = calls['date'].str[:10]
print(calls)

      company       sector  year  quarter        date  \
0        ADNT  automobiles  2017        2  2017-04-28   
1        ADNT  automobiles  2017        3  2017-07-27   
2        ADNT  automobiles  2017        4  2017-11-05   
3        ADNT  automobiles  2018        1  2018-01-29   
4        ADNT  automobiles  2018        2  2018-05-03   
...       ...          ...   ...      ...         ...   
62069     KMX       retail  2022        3  2021-12-22   
62070     KMX       retail  2022        4  2022-04-12   
62071     KMX       retail  2023        1  2022-06-24   
62072     KMX       retail  2023        2  2022-09-29   
62073     KSS       retail  2007        3  2007-08-17   

                                              transcript  
0      Operator: Welcome, and thank you all for stand...  
1      Operator: Welcome, and thank for standing by. ...  
2      Operator: Welcome, and thank you for standing ...  
3      Operator: Welcome, and thank you for joining t...  
4      Operator: Wel

### Earnings Call Data Merge

In [5]:
# Inner join cr_and_dates (on columns ticker, earnings_call_date) with calls.csv (on columns company, date)
merged = pd.merge(cr_and_dates, calls, left_on=['ticker', 'earnings_call_date'], right_on=['company', 'date'], how='inner')
merged

# Print range of dates
print('date range')
print(merged['date'].min())
print(merged['date'].max())

# Range of year, credit_rating_year
for variable in ['year', 'credit_rating_year']:
    print(variable)
    print(merged[variable].min())
    print(merged[variable].max())

# Print rows where year is greater than 2016
# This is due to inconsistencies in the earnings call data year variable
print('years greater than 2016')
print(merged[merged['year'] > 2016][['earnings_call_date', 'ticker', 'company', 'date', 'year', 'credit_rating_year']])

# Free up memory from calls
del calls

date range
2010-07-31
2016-09-30
year
2010
2019
credit_rating_year
2010
2016
years greater than 2016
     earnings_call_date ticker company        date  year  credit_rating_year
139          2016-08-25   AMCR    AMCR  2016-08-25  2019                2016
244          2016-07-27    ASH     ASH  2016-07-27  2017                2016
318          2016-09-22    AZO     AZO  2016-09-22  2017                2016
323          2015-10-27   BABA    BABA  2015-10-27  2017                2015
324          2016-01-28   BABA    BABA  2016-01-28  2017                2016
...                 ...    ...     ...         ...   ...                 ...
4234         2016-02-11   VSTO    VSTO  2016-02-11  2017                2016
4235         2016-05-12   VSTO    VSTO  2016-05-12  2018                2016
4236         2016-08-11   VSTO    VSTO  2016-08-11  2018                2016
4308         2016-06-30    WOR     WOR  2016-06-30  2017                2014
4309         2016-09-28    WOR     WOR  2016-09-28  

### Financial Statements Data Merge

In [6]:
# Load tabular financial statement data
df = pd.read_parquet(r'~\Box\STAT 222 Capstone\Intermediate Data\Tabular_Fin\combined_corrected_tabular_financial_statements_data.parquet')
print(df)

# Perform merge
merged = pd.merge(merged, df, left_on=['company', 'year', 'quarter'], right_on=['symbol', 'calendarYear', 'period'], how='inner')    
print(merged)

             date symbol reportedCurrency     cik fillingDate  \
0      2023-09-30    BCE              CAD  718940  2023-09-30   
1      2023-06-30    BCE              CAD  718940  2023-06-30   
2      2023-03-31    BCE              CAD  718940  2023-03-31   
3      2022-12-31    BCE              CAD  718940  2022-12-31   
4      2022-09-30    BCE              CAD  718940  2022-09-30   
...           ...    ...              ...     ...         ...   
54213  2008-03-31   YORW              USD  108985  2008-05-09   
54214  2007-12-31   YORW              USD  108985  2008-03-11   
54215  2007-09-30   YORW              USD  108985  2007-11-08   
54216  2007-06-30   YORW              USD  108985  2007-08-09   
54217  2007-03-31   YORW              USD  108985  2007-05-09   

              acceptedDate  calendarYear  period  cashAndCashEquivalents  \
0      2023-09-29 20:00:00          2023       3             619000000.0   
1      2023-06-30 00:00:00          2023       2             900000

### Rating Changes

In [7]:
# Variable for rating on previous fixed_quarter_date
# Sort by ticker, fixed_quarter_date
merged = merged.sort_values(by=['ticker', 'fixed_quarter_date'])
# Shift the rating by 1
merged['rating_on_previous_fixed_quarter_date'] = merged.groupby('ticker')['Rating'].shift(1)
merged[['ticker', 'fixed_quarter_date', 'Rating', 'rating_on_previous_fixed_quarter_date']].head(10)

Unnamed: 0,ticker,fixed_quarter_date,Rating,rating_on_previous_fixed_quarter_date
0,ABBV,2014-10-01 00:00:00.000000,A,
1,ABBV,2015-01-01 00:00:00.000000,A,A
2,ABBV,2015-04-01 00:00:00.000000,A,A
3,ABBV,2015-07-01 00:00:00.000000,A,A
4,ABBV,2015-10-01 00:00:00.000000,A,A
5,ABBV,2016-01-01 00:00:00.000000,A,A
6,ABBV,2016-04-01 00:00:00.000000,A,A
7,ABBV,2016-07-01 00:00:00.000000,A,A
8,ABBV,2016-10-01 00:00:00.000000,A,A
9,ABC,2011-04-01 00:00:00.000000,A,


In [8]:
# Rating change variables
# Get numeric values for Rating and rating_on_next_earnings_call_date
# Encode Ratings as ordinal
rating_order = ['AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC', 'CC', 'C', 'D']
# Reverse
rating_order = rating_order[::-1]
# Create dictionary mapping to number values
rating_dict = {rating: i for i, rating in enumerate(rating_order, 1)}
# Create new column "Rating Rank AAA is 10"
merged['Rating Rank AAA is 10'] = merged['Rating'].map(rating_dict)
# Create new column "rating_on_previous_fixed_quarter_date AAA is 10"
merged['rating_on_previous_fixed_quarter_date AAA is 10'] = merged['rating_on_previous_fixed_quarter_date'].map(rating_dict)

# Cross tab Rating and 'Rating Rank AAA is 10'
print(pd.crosstab(merged['Rating'], merged['Rating Rank AAA is 10']))
# Cross tab rating_on_previous_fixed_quarter_date and 'rating_on_previous_fixed_quarter_date AAA is 10'
print(pd.crosstab(merged['rating_on_previous_fixed_quarter_date'], merged['rating_on_previous_fixed_quarter_date AAA is 10']))

Rating Rank AAA is 10  2   3    4    5     6     7    8    9   10
Rating                                                           
A                       0   0    0    0     0     0  904    0   0
AA                      0   0    0    0     0     0    0  215   0
AAA                     0   0    0    0     0     0    0    0  83
B                       0   0    0  514     0     0    0    0   0
BB                      0   0    0    0  1088     0    0    0   0
BBB                     0   0    0    0     0  1403    0    0   0
C                      15   0    0    0     0     0    0    0   0
CC                      0   5    0    0     0     0    0    0   0
CCC                     0   0  145    0     0     0    0    0   0
rating_on_previous_fixed_quarter_date AAA is 10  2.0   3.0   4.0   5.0   6.0   \
rating_on_previous_fixed_quarter_date                                           
A                                                   0     0     0     0     0   
AA                             

In [9]:
# Variable for change before next call
def change(row):
    if row['Rating Rank AAA is 10'] < row['rating_on_previous_fixed_quarter_date AAA is 10']:
        return 'Downgrade Since Last Fixed Quarter Date'
    elif row['Rating Rank AAA is 10'] > row['rating_on_previous_fixed_quarter_date AAA is 10']:
        return 'Upgrade Since Last Fixed Quarter Date'
    elif row['Rating Rank AAA is 10'] == row['rating_on_previous_fixed_quarter_date AAA is 10']:
        return 'Same As Last Fixed Quarter Date'
    else:
        return None

# Apply function to create new column
merged['Change Direction Since Last Fixed Quarter Date'] = merged.apply(change, axis = 1)

# Also numeric version 'Change Since Last Fixed Quarter Date'
merged['Change Since Last Fixed Quarter Date'] = merged['Rating Rank AAA is 10'] - merged['rating_on_previous_fixed_quarter_date AAA is 10']

# Cross tab 'Change Direction Since Last Fixed Quarter Date' and 'Change Since Last Fixed Quarter Date'
print(pd.crosstab(merged['Change Direction Since Last Fixed Quarter Date'], merged['Change Since Last Fixed Quarter Date']))

Change Since Last Fixed Quarter Date            -2.0  -1.0   0.0   1.0   2.0
Change Direction Since Last Fixed Quarter Date                              
Downgrade Since Last Fixed Quarter Date           10    92     0     0     0
Same As Last Fixed Quarter Date                    0     0  3824     0     0
Upgrade Since Last Fixed Quarter Date              0     0     0   120    13


### Some Basic Stats

In [11]:
# Print range of dates
# print('date range')
# print(merged['date'].min())
# print(merged['date'].max())

# Range of year, credit_rating_year, calendarYear
# for variable in ['year', 'credit_rating_year', 'calendarYear']:
#     print(variable)
#     print(merged[variable].min())
#     print(merged[variable].max())

In [12]:
# Summarize columns
print(merged.info())
# print column names
print(merged.columns)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4372 entries, 0 to 4371
Columns: 152 entries, ticker to Change Since Last Fixed Quarter Date
dtypes: float64(104), int32(1), int64(9), object(38)
memory usage: 5.1+ MB
None
Index(['ticker', 'fixed_quarter_date', 'earnings_call_date', 'Rating',
       'Rating Agency Name', 'rating_date', 'Source', 'Rating Rank AAA is 10',
       'Next Rating', 'Next Rating Date',
       ...
       'eps', 'epsdiluted', 'weightedAverageShsOut',
       'weightedAverageShsOutDil', 'link_income_statement',
       'finalLink_income_statement', 'rating_on_previous_fixed_quarter_date',
       'rating_on_previous_fixed_quarter_date AAA is 10',
       'Change Direction Since Last Fixed Quarter Date',
       'Change Since Last Fixed Quarter Date'],
      dtype='object', length=152)


In [13]:
# Save a parquet file as well
merged.to_parquet(r'~\Box\STAT 222 Capstone\Intermediate Data\All_Data\all_data_fixed_quarter_dates.parquet', index=False)

In [None]:
# Sample 100 rows to create all_data_sample.csv
if not DEBUG:
    all_data_sample = merged.sample(100)
    all_data_sample.to_csv(r'~\Box\STAT 222 Capstone\Intermediate Data\All_Data\all_data_fixed_quarter_dates_sample.csv', index=False)
    # also save to xlsx
    all_data_sample.to_excel(r'~\Box\STAT 222 Capstone\Intermediate Data\All_Data\all_data_fixed_quarter_dates_sample.xlsx', index=False)