In [2]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from openai import OpenAI
import ast
import tqdm
import os
from Py_Files import credentials
from Py_Files import factset_api
from Py_Files import financial_modeling_prep as fmp
from Py_Files import qml_equity_ratios

import statsmodels.api as sm
import matplotlib.pyplot as plt
import plotly.express as px



In [None]:
data_dir = '/Users/joeybortfeld/Documents/QML Solutions Data/'

date_range = pd.date_range(start='2021-01-21', end='2021-10-31', freq='ME')
date_range = [d.strftime('%m%d%Y') for d in date_range]

collection = []
for date in tqdm.tqdm(date_range):
    temp = pd.read_excel(data_dir + f'ice_data/history/old_constituents/C0A0-{date}.xlsx', skiprows=1)
    temp = temp[temp['ISIN number'].notnull()]
    temp.columns = [c.lower().replace(' ', '_') for c in temp.columns]
    temp['date'] = pd.to_datetime(date, format='%m%d%Y')

    collection.append(temp)

df = pd.concat(collection, axis=0)

print(df.columns)

In [None]:

rating_to_num_dict = {'AAA': 21, 'AA1': 20, 'AA2': 19, 'AA3': 18, 
                      'A1': 17, 'A2': 16, 'A3': 15, 
                      'BBB1': 14, 'BBB2': 13, 'BBB3': 12, 
                      'BB1': 11, 'BB2': 10, 'BB3': 9,
                      'B1': 8, 'B2': 7, 'B3': 6, 
                      'CCC1': 5, 'CCC2': 4, 'CCC3': 3, 
                      'CC': 2, 'C': 1, 'D': 0}

df['rating_num'] = df['rating'].map(rating_to_num_dict)
df['oas_prevmend'] = df['oas'] - df['oas_mtd_change']
df = df.columne(rename=['%_mktval-prevmend': '%_mkt_value_prevmend'])
# calculate returns by description and ticker

suffix_dict = {'cusip':'_by_bond', 'ticker':'_by_ticker', 'description':'_by_description'}

# calculate aggregations by ticker and description
for group in ['cusip', 'ticker', 'description']:

    this_suffix = suffix_dict[group]
    df[f'total_weight{this_suffix}'] = df.groupby(by=[group, 'date'])['%_mkt_value'].transform('sum')
    df[f'weight{this_suffix}'] = df['%_mkt_value'] / df[f'total_weight{this_suffix}']

    # calculate market-weighted ametrics by group
    # - excess return
    # - oas
    for m in ['excess_return_%_mtd', 'oas',]:
        df[f'{m}{this_suffix}'] = df[m] * df[f'weight{this_suffix}']
        df[f'{m}{this_suffix}'] = df.groupby(by=[group, 'date'])[f'{m}{this_suffix}'].transform('sum')

    # calculate size by group
    df[f'total_mkt_value{this_suffix}'] = df.groupby(by=[group, 'date'])['%_mkt_value'].transform('sum')
    temp = df.drop_duplicates(subset=[group, 'date'])[['date', group, f'total_mkt_value{this_suffix}']]
    temp[f'size{this_suffix}'] = temp.groupby(by=[group, 'date'])[f'total_mkt_value{this_suffix}'].transform('sum')

# calculate factors (decile ranks)
for group in ['cusip', 'ticker', 'description']:

    for factor in [
        ('size_factor', 'total_mkt_value'), 
        ('carry_factor', 'oas'),
        ]:

        factor_name = factor[0]
        factor_col = factor[1]

        this_suffix = suffix_dict[group]
        temp = df.drop_duplicates(subset=['date', group], keep='first')
        temp[f'{factor_name}{this_suffix}'] = temp.groupby(by=['date'])[f'{factor_col}{this_suffix}'].transform(lambda x: pd.qcut(x, q=10, labels=False, duplicates='drop'))
        temp = temp[[group, 'date', f'{factor_name}{this_suffix}']]
        df = df.merge(temp, on=[group, 'date'], how='left')


    

In [None]:
this_fsym = 'MH33D6-R'

temp1 = pd.read_csv(f'/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_fundamentals/quarterly/{this_fsym}.csv')
temp1 = temp1[temp1['metric'] == 'FF_COM_SHS_OUT']
temp1 = temp1[['fsymId', 'fiscalEndDate', 'epsReportDate', 'value']]
temp1.columns = ['fsym_id', 'fiscal_end_date', 'report_date', 'ff_com_shs_out']
temp1['report_month'] = pd.to_datetime(temp1['report_date']).dt.month
temp1['report_year'] = pd.to_datetime(temp1['report_date']).dt.year

temp2 = pd.read_csv(f'/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/shares/{this_fsym}.csv')
temp2['report_year'] = pd.to_datetime(temp2['date']).dt.year
temp2['report_month'] = pd.to_datetime(temp2['date']).dt.month
temp2 = temp2[['report_year', 'report_month', 'totalOutstanding']]

temp = temp1.merge(temp2, on=['report_year', 'report_month'], how='left')
temp.set_index('fiscal_end_date')[['ff_com_shs_out', 'totalOutstanding']].plot(figsize=(10, 5))








In [None]:
temp.groupby(by='size_factor_by_ticker')['excess_return_%_mtd_by_ticker'].median().plot(kind='bar')

In [None]:
temp

In [None]:
df.columns

In [None]:
df.columns

In [None]:
# bond-level analysis

# description-level analysis

# ticker    

In [None]:
def calc_groupby_return(data, groupby_col, return_col, calc_method='mean'):

    df = data.groupby(groupby_col, as_index=False)[return_col].apply(calc_method)
    df.columns = ['cohort', 'value']
    df['grouping'] = groupby_col
    df['calc_method'] = calc_method
    df['metric'] = return_col
    df = df[['grouping','metric', 'calc_method', 'cohort', 'value']]

    return df
calc_groupby_return(df, 'rating_num', 'excess_return_%_mtd')
# calc_groupby_return(df, 'rating_num', 'total_return_%_mtd_loc')

In [None]:
def size_factor(df):

    df['']

In [None]:
print('bond count', len(temp))
print('description count', len(temp['description'].unique()))
print('ticker count', len(temp['ticker'].unique()))

In [None]:
temp

In [None]:
equity_fsyms[30]

In [None]:
category = 'liquidity'
# metric = 'P-value'
# metric = 'Coefficient'
metric ='AUROC - Train'

collection = []
for i in [1,2,3,4,5]:
    temp = pd.read_csv(f'/Users/joeybortfeld/Downloads/univariate_reg_{i}y-pct_split.csv')
    temp['t'] = i
    collection.append(temp)

df = pd.concat(collection, axis=0)

print(df['Category'].unique())

df['P-value'] = df['P-value'].map(lambda x: f"{x:.3f}")
df['AUROC - Train'] = df['AUROC - Train'].map(lambda x: f"{x:.2f}")
df['Coefficient'] = df['Coefficient'].map(lambda x: f"{x:.2f}")


df = df[df['Category'] == category]

df = df.pivot(index='Variable', columns='t', values=metric)
df = df.sort_values(by=5, ascending=False)
df


In [None]:

import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.optimize import fsolve

def merton_distance_to_default(market_cap, debt, equity_vol, risk_free_rate=0.03, time_horizon=1):
    """ Computes Distance to Default (DD) using Merton's structural model. """
    
    # Initial guess: Assume asset value is close to market cap
    asset_value = market_cap
    asset_vol = equity_vol  # Approximate initial asset volatility

    def equations(vars):
        A, sigma_A = vars
        d1 = (np.log(A / debt) + (risk_free_rate + 0.5 * sigma_A ** 2) * time_horizon) / (sigma_A * np.sqrt(time_horizon))
        d2 = d1 - sigma_A * np.sqrt(time_horizon)

        eq1 = market_cap - (A * norm.cdf(d1) - np.exp(-risk_free_rate * time_horizon) * debt * norm.cdf(d2))
        eq2 = equity_vol * market_cap - norm.cdf(d1) * A * sigma_A

        return [eq1, eq2]

    # Solve for asset value (A) and asset volatility (sigma_A)
    A, sigma_A = fsolve(equations, [asset_value, asset_vol])

    # Compute Distance to Default
    d1 = (np.log(A / debt) + (risk_free_rate + 0.5 * sigma_A ** 2) * time_horizon) / (sigma_A * np.sqrt(time_horizon))
    d2 = d1 - sigma_A * np.sqrt(time_horizon)
    
    distance_to_default = d2
    probability_of_default = norm.cdf(-distance_to_default)

    return distance_to_default, probability_of_default

# Example Firm Data
market_cap = 36_702_000_000  # $5 billion
debt = 3_000_000_000        # $3 billion in debt
equity_vol = 0.30           # 30% annualized volatility

# Compute DD and PD
dd, pd = merton_distance_to_default(market_cap, debt, equity_vol)
print(f"Distance to Default: {dd:.7f}")
print(f"Probability of Default: {pd:.7%}")


In [None]:
temp[temp['date'] < pd.to_datetime('2000-01-01')].set_index('date')['price'].plot()

In [None]:
split_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/prices SPLIT/')
split_fsyms = [i.split('.')[0] for i in split_fsyms]

unsplit_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/prices UNSPLIT/')
unsplit_fsyms = [i.split('.')[0] for i in unsplit_fsyms]

excel_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/excel_addin_download/')
excel_fsyms = [i.split('_')[0] for i in excel_fsyms]

share_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/shares/')
share_fsyms = [i.split('.')[0] for i in share_fsyms]

print('split', len(split_fsyms))
print('unsplit', len(unsplit_fsyms))
print('excel', len(excel_fsyms))
print('shares', len(share_fsyms))

[f for f in excel_fsyms if not f in split_fsyms]



In [None]:
response, temp1 = factset_api.get_stock_prices(id_list=[this_fsym], 
field_list=['price', 'volume', 'tradeCount'], 
start_date='2006-01-06', 
end_date='2024-12-31', 
frequency='D',
split='SPLIT',
verbose=True,
authorization=credentials.factset_api_authorization)

temp1


In [None]:
this_fsym = 'MH33D6-R'

response, temp1 = factset_api.get_stock_prices(id_list=[this_fsym], 
field_list=['price', 'volume', 'tradeCount'], 
start_date='2006-01-06', 
end_date='2024-12-31', 
frequency='D',
split='SPLIT',
authorization=credentials.factset_api_authorization)


fig, axes = plt.subplots(figsize=(10, 5), ncols=2)
temp.set_index('date')['price'].plot(ax=axes[0])

response, temp2 = factset_api.get_shares_outanding(id_list=[this_fsym], 
                     start_date='2006-03-31', 
                     end_date='2024-12-31', 
                     frequency='M',
                     verbose=False,
                     authorization=credentials.factset_api_authorization)

print(response)
temp2.set_index('date')['totalOutstanding'].plot(ax=axes[1])

In [None]:
temp = temp1.merge(temp2, on='date', how='outer')
temp['totalOutstanding'] = temp['totalOutstanding'].fillna(method='ffill')
fig, ax = plt.subplots(figsize=(10, 5), ncols=2)
temp.set_index('date')[['totalOutstanding', 'price']].plot(secondary_y='totalOutstanding', ax=ax[0])

temp['market_cap'] = temp['totalOutstanding'] * temp['price']
temp['market_cap'].plot(ax=ax[1])
ax[0].set_title('Total Outstanding and Price')
ax[1].set_title('Market Cap')
plt.show()

temp['year'] = pd.to_datetime(temp['date']).dt.year
temp = temp.drop_duplicates(subset=['year'], keep='last')
temp['market_cap'] /= 1_000
temp[['date', 'market_cap', 'price']]


# Price Download using SPLIT


In [None]:
response, temp = factset_api.download_fundamentals(id_list=['MH33D6-R'], 
                                #   field_list=['FF_IS_MULTI_SHARE', 'FF_IS_ADR'],
                                  field_list=['FF_COM_SHS_OUT'],
                                    periodicity='ANN', 
                                    start_date='1990-01-01', 
                                    end_date='2024-12-31', 
                                    currency='LOCAL',
                                    update_type='RP', 
                                    verbose=True, 
                                    authorization=credentials.factset_api_authorization)
temp.set_index('reportDate')['value'].plot(kind='bar')


In [None]:
import requests
url = 'https://api.factset.com/content/factset-global-prices/v1/prices?ids=CGF31Z-R&fields=price,priceOpen,priceHigh,priceLow,volume&startDate=2024-01-01&endDate=2024-08-27&frequency=D&calendar=FIVEDAY&currency=EUR&adjust=SPLIT&batch=N'

headers = {'Accept': 'application/json','Content-Type': 'application/json'}

response = requests.get(url, headers=headers, auth = credentials.factset_api_authorization)

print(response.status_code)
print(response.text)

In [None]:
def calculate_cagr(initial, final, years):
    return (final / initial) ** (1 / years) - 1

# Parameters
initial_revenue = 1.00
initial_expenses = 0.30
initial_net_income = initial_revenue - initial_expenses

years = 5

# Define revenue and expense growth rates to iterate over
revenue_growth_rates = np.arange(0.01, 0.1, 0.01)  # 1% to 5%
expense_growth_rates = np.arange(0.01, 0.1, 0.01)  # 1% to 5%

# Create table
cagr_table = pd.DataFrame(index=[f"{e*100:.0f}%" for e in expense_growth_rates],
                          columns=[f"{r*100:.0f}%" for r in revenue_growth_rates])

for e_growth in expense_growth_rates:
    for r_growth in revenue_growth_rates:
        # Compute revenue and expense projections
        final_revenue = initial_revenue * (1 + r_growth) ** years
        final_expenses = initial_expenses * (1 + e_growth) ** years
        final_net_income = final_revenue - final_expenses
        
        # Compute CAGR of net income
        cagr_net_income = calculate_cagr(initial_net_income, final_net_income, years)
        cagr_table.loc[f"{e_growth*100:.0f}%", f"{r_growth*100:.0f}%"] = f"{cagr_net_income*100:.2f}%"

# Display the result
cagr_table.to_csv('/Users/joeybortfeld/Downloads/cagr_table.csv')
