In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from openai import OpenAI
import ast
import tqdm
import os
from Py_Files import credentials
from Py_Files import factset_api
from Py_Files import financial_modeling_prep as fmp
from Py_Files import qml_equity_ratios

import statsmodels.api as sm
import matplotlib.pyplot as plt
import plotly.express as px



In [None]:
# get benchmark data
df_sp500 = fmp.download_stock_returns(ticker='SP500', start_date='1990-01-01')
df_sp500['date'] = pd.to_datetime(df_sp500['date'])
df_sp500 = df_sp500[['date', 'total_return', 'adjClose']]
df_sp500.columns = ['date', 'benchmark_return', 'benchmark_price']
df_sp500['benchmark'] = 'SP500'

df_stoxx = fmp.download_stock_returns(ticker='STOXX', start_date='1990-01-01')
df_stoxx['date'] = pd.to_datetime(df_stoxx['date'])
df_stoxx = df_stoxx[['date', 'total_return', 'adjClose']]
df_stoxx.columns = ['date', 'benchmark_return', 'benchmark_price']
df_stoxx['benchmark'] = 'STOXX'

df_nikkei = fmp.download_stock_returns(ticker='NIKKEI', start_date='1990-01-01')
df_nikkei['date'] = pd.to_datetime(df_nikkei['date'])
df_nikkei = df_nikkei[['date', 'total_return', 'adjClose']]
df_nikkei.columns = ['date', 'benchmark_return', 'benchmark_price']
df_nikkei['benchmark'] = 'NIKKEI'

df_benchmarks = pd.concat([df_sp500, df_stoxx, df_nikkei])

for bench in df_benchmarks['benchmark'].unique():

    if bench=='STOXX':
        df_benchmarks[df_benchmarks['benchmark'] == bench].set_index('date')['benchmark_price'].plot(secondary_y=True)
    else:
        df_benchmarks[df_benchmarks['benchmark'] == bench].set_index('date')['benchmark_price'].plot()

df_benchmarks


In [None]:
df_benchmarks[df_benchmarks['benchmark'] == 'STOXX'].set_index('date')['benchmark_return'].plot()

In [None]:
import requests
url = 'https://financialmodelingprep.com/api/v3/symbol/available-indexes'
params = {
        "apikey": 'PEPnFg1Hwwgd0zkhMmuiI8DwYC2qOq7P',
        "from": '2000-01-01'  # Set the start date to 1990-01-01
    }

# Send GET request to the API
response = requests.get(url, params=params)
print(response.status_code)
if response.status_code != 200:
    print('failed to get data')
    fadfdsf

data = response.json()
pd.DataFrame(data).to_csv('/Users/joeybortfeld/Downloads/fmp_indexes.csv', index=False)

In [None]:

this_fsym = 'K5JZYK-R'
make_plots = True
temp = qml_equity_ratios.merge_equity_data(fsym_id=this_fsym)
temp['fsym_id'] = this_fsym
his_country = fsym_to_country_dict[this_fsym]
this_region = country_to_region_dict[this_country]
this_benchmark = region_to_benchmark_dict[this_region]
print(this_benchmark)

temp = qml_equity_ratios.combine_benchmark_data(temp, df_benchmarks, benchmark=this_benchmark)


temp1a = qml_equity_ratios.calc_capm(temp, trailing_periods_list=[182,365], frequency='ME', outlier_drops=4, downside_only=False, exponential_weighting=(False, 0.99))
temp1b = qml_equity_ratios.calc_capm(temp, trailing_periods_list=[182,365], frequency='ME', outlier_drops=4, downside_only=True, exponential_weighting=(False, 0.99))

temp1a = qml_equity_ratios.calc_capm(temp, trailing_periods_list=[182,365], frequency='ME', outlier_drops=4, downside_only=False, exponential_weighting=(False, 0.99))
temp1b = qml_equity_ratios.calc_capm(temp, trailing_periods_list=[182,365], frequency='ME', outlier_drops=4, downside_only=True, exponential_weighting=(False, 0.99))
temp2 = qml_equity_ratios.calc_rolling_returns(temp)
temp3 = qml_equity_ratios.calc_drawdown(temp)
temp4 = qml_equity_ratios.calc_downside_volatility(temp)
temp5 = qml_equity_ratios.calc_ulcer_index(temp)


if make_plots:
    if temp.shape[0] > 0:
        
        fig, axes = plt.subplots(figsize=(15, 14), ncols=3, nrows=3)
        temp.set_index('date')[['price', 'price_split']].plot(ax=axes[0][0])
        temp.set_index('date')[['market_cap', 'market_cap_split']].plot(ax=axes[0][1])

        temp1a.set_index('date')[['capm_idio_vol_182', 'capm_return_vol_182']].plot(ax=axes[1][0], title='CAPM')
        temp1b.set_index('date')[['capm_idio_vol_down_182', 'capm_return_vol_down_182']].plot(ax=axes[1][1], title='Downside Only CAPM')

        temp2.set_index('date')[['return_6', 'return_12']].plot(ax=axes[1][2])

        temp3.set_index('date')[['drawdown_128', 'drawdown_252']].plot(ax=axes[2][0])
        temp4.set_index('date')[['downside_vol_128', 'downside_vol_252']].plot(ax=axes[2][1])
        temp5.set_index('date')[['ulcer_index_128', 'ulcer_index_252']].plot(ax=axes[2][2])
        
        plt.show()

        temp[temp['date'] > pd.to_datetime('2005-12-25')][['date', 'price', 'price_split', 'market_cap', 'market_cap_split', 'total_outstanding']].tail(40)

        plt.tight_layout()


In [None]:
temp = pd.read_csv('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/shares/K5JZYK-R.csv')
temp[temp['totalOutstanding'].notnull()]

In [None]:
# iterate over all applicable fsym_ids

# excel add-in data
excel_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/excel_addin_download/')
excel_fsyms = [i.split('_')[0] for i in excel_fsyms]
print(len(excel_fsyms))

# api data
split_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/prices SPLIT/')
split_fsyms = [i.split('.')[0] for i in split_fsyms]
print(len(split_fsyms))

# combined equity fysm universe
equity_fsyms = list(set(excel_fsyms) | set(split_fsyms))
print('total fsyms', len(equity_fsyms))

completed_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/processed/')
completed_fsyms = [i.split('.')[0] for i in completed_fsyms]

equity_fsyms = [f for f in equity_fsyms if not f in completed_fsyms]
print('--remaining', len(equity_fsyms))

# build a dictionary with mappings from exchange country to region
temp = pd.read_csv('/Users/joeybortfeld/Documents/QML Solutions Data/universe_and_traits/country_to_region_mapping.csv')
country_to_region_dict = temp.set_index('exchange_country')['region'].to_dict()

# build a dictionary with mappings from fsym_id to exchange country
temp = pd.read_csv('/Users/joeybortfeld/Documents/QML Solutions Data/universe_and_traits/qml_universe_ids.csv')
temp['region'] = temp['exchange_country'].map(country_to_region_dict)
fsym_to_country_dict = temp.set_index('fsym_id')['exchange_country'].to_dict()

region_to_benchmark_dict = {
    'North America': 'SP500',
    'Europe': 'STOXX',
    'Japan': 'NIKKEI',
    'Asia ex-Japan': 'None',
    'Africa': 'None', 
    'Middle East': 'None',
    'South America': 'None',
    '@NA': 'None',
    np.nan: 'None'
}

collection = []
for this_fsym in tqdm.tqdm(equity_fsyms):

    temp = qml_equity_ratios.merge_equity_data(fsym_id=this_fsym)
    temp['fsym_id'] = this_fsym
    this_country = fsym_to_country_dict[this_fsym]
    this_region = country_to_region_dict[this_country]
    this_benchmark = region_to_benchmark_dict[this_region]
    
    # skip if fsym_id is from a region that we do not map to a benchmark
    if this_benchmark == 'None':
        continue

    temp = qml_equity_ratios.combine_benchmark_data(temp, df_benchmarks, benchmark=this_benchmark)

    temp1a = qml_equity_ratios.calc_capm(temp, trailing_periods_list=[182,365], frequency='ME', outlier_drops=4, downside_only=False, exponential_weighting=(False, 0.99))
    temp1b = qml_equity_ratios.calc_capm(temp, trailing_periods_list=[182,365], frequency='ME', outlier_drops=4, downside_only=True, exponential_weighting=(False, 0.99))
    temp2 = qml_equity_ratios.calc_rolling_returns(temp)
    temp3 = qml_equity_ratios.calc_drawdown(temp)
    temp4 = qml_equity_ratios.calc_downside_volatility(temp)
    temp5 = qml_equity_ratios.calc_ulcer_index(temp)

    df = temp[['fsym_id', 'date', 'market_cap', 'price', 'volume', ]].merge(temp1a, on=['date'], how='outer')
    df = df.merge(temp1b, on=['date'], how='outer')
    df = df.merge(temp2, on=['date',], how='outer')
    df = df.merge(temp3, on=['date',], how='outer')
    df = df.merge(temp4, on=['date',], how='outer')
    df = df.merge(temp5, on=['date',], how='outer')

    # fillin missing 
    # - for the monthly calculations (CAPM, etc) we assume exact month end dates (10/31)
    # - but for the daily data (price, market cap, returns, etc) the last trading date may not be true month end (10/29)
    for c in ['market_cap', 'price', 'volume', 'return_1', 'return_2', 'return_3', 'return_6', 'return_12']:
        df[c] = df[c].ffill(limit=10)
    df['fsym_id'] = this_fsym

    # drop obs out of bounds of the available data date range
    min_date = df[df['price'].notnull()]['date'].min()
    max_date = df[df['price'].notnull()]['date'].max()

    # reduce to monthly
    df['year'] = pd.to_datetime(df['date']).dt.year
    df['month'] = pd.to_datetime(df['date']).dt.month
    df['max_date'] = df.groupby(['year', 'month'])['date'].transform('max')
    df = df[df['date'] == df['max_date']]
    df = df.drop(columns=['year', 'month', 'max_date'])

    df = df[df['date'] >= min_date]
    df = df[df['date'] <= max_date]

    df.to_csv(f'/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/processed/{this_fsym}.csv', index=False)
    collection.append(df)

collection = pd.concat(collection, axis=0)
collection.to_csv('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_consolidated/equity_ratios_combined.csv', index=False)







In [None]:
temp[['date', 'price', 'benchmark']]

In [None]:
collection['fsym_id'].nunique()

In [None]:
equity_fsyms[30]

In [None]:
category = 'liquidity'
# metric = 'P-value'
# metric = 'Coefficient'
metric ='AUROC - Train'

collection = []
for i in [1,2,3,4,5]:
    temp = pd.read_csv(f'/Users/joeybortfeld/Downloads/univariate_reg_{i}y-pct_split.csv')
    temp['t'] = i
    collection.append(temp)

df = pd.concat(collection, axis=0)

print(df['Category'].unique())

df['P-value'] = df['P-value'].map(lambda x: f"{x:.3f}")
df['AUROC - Train'] = df['AUROC - Train'].map(lambda x: f"{x:.2f}")
df['Coefficient'] = df['Coefficient'].map(lambda x: f"{x:.2f}")


df = df[df['Category'] == category]

df = df.pivot(index='Variable', columns='t', values=metric)
df = df.sort_values(by=5, ascending=False)
df


In [None]:

import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.optimize import fsolve

def merton_distance_to_default(market_cap, debt, equity_vol, risk_free_rate=0.03, time_horizon=1):
    """ Computes Distance to Default (DD) using Merton's structural model. """
    
    # Initial guess: Assume asset value is close to market cap
    asset_value = market_cap
    asset_vol = equity_vol  # Approximate initial asset volatility

    def equations(vars):
        A, sigma_A = vars
        d1 = (np.log(A / debt) + (risk_free_rate + 0.5 * sigma_A ** 2) * time_horizon) / (sigma_A * np.sqrt(time_horizon))
        d2 = d1 - sigma_A * np.sqrt(time_horizon)

        eq1 = market_cap - (A * norm.cdf(d1) - np.exp(-risk_free_rate * time_horizon) * debt * norm.cdf(d2))
        eq2 = equity_vol * market_cap - norm.cdf(d1) * A * sigma_A

        return [eq1, eq2]

    # Solve for asset value (A) and asset volatility (sigma_A)
    A, sigma_A = fsolve(equations, [asset_value, asset_vol])

    # Compute Distance to Default
    d1 = (np.log(A / debt) + (risk_free_rate + 0.5 * sigma_A ** 2) * time_horizon) / (sigma_A * np.sqrt(time_horizon))
    d2 = d1 - sigma_A * np.sqrt(time_horizon)
    
    distance_to_default = d2
    probability_of_default = norm.cdf(-distance_to_default)

    return distance_to_default, probability_of_default

# Example Firm Data
market_cap = 36_702_000_000  # $5 billion
debt = 3_000_000_000        # $3 billion in debt
equity_vol = 0.30           # 30% annualized volatility

# Compute DD and PD
dd, pd = merton_distance_to_default(market_cap, debt, equity_vol)
print(f"Distance to Default: {dd:.7f}")
print(f"Probability of Default: {pd:.7%}")


In [None]:
temp[temp['date'] < pd.to_datetime('2000-01-01')].set_index('date')['price'].plot()

In [None]:
split_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/prices SPLIT/')
split_fsyms = [i.split('.')[0] for i in split_fsyms]

unsplit_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/prices UNSPLIT/')
unsplit_fsyms = [i.split('.')[0] for i in unsplit_fsyms]

excel_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/excel_addin_download/')
excel_fsyms = [i.split('_')[0] for i in excel_fsyms]

share_fsyms = os.listdir('/Users/joeybortfeld/Documents/QML Solutions Data/factset_data/factset_equity/shares/')
share_fsyms = [i.split('.')[0] for i in share_fsyms]

print('split', len(split_fsyms))
print('unsplit', len(unsplit_fsyms))
print('excel', len(excel_fsyms))
print('shares', len(share_fsyms))

[f for f in excel_fsyms if not f in split_fsyms]



In [None]:
response, temp1 = factset_api.get_stock_prices(id_list=[this_fsym], 
field_list=['price', 'volume', 'tradeCount'], 
start_date='2006-01-06', 
end_date='2024-12-31', 
frequency='D',
split='SPLIT',
verbose=True,
authorization=credentials.factset_api_authorization)

temp1


In [None]:
this_fsym = 'MH33D6-R'

response, temp1 = factset_api.get_stock_prices(id_list=[this_fsym], 
field_list=['price', 'volume', 'tradeCount'], 
start_date='2006-01-06', 
end_date='2024-12-31', 
frequency='D',
split='SPLIT',
authorization=credentials.factset_api_authorization)


fig, axes = plt.subplots(figsize=(10, 5), ncols=2)
temp.set_index('date')['price'].plot(ax=axes[0])

response, temp2 = factset_api.get_shares_outanding(id_list=[this_fsym], 
                     start_date='2006-03-31', 
                     end_date='2024-12-31', 
                     frequency='M',
                     verbose=False,
                     authorization=credentials.factset_api_authorization)

print(response)
temp2.set_index('date')['totalOutstanding'].plot(ax=axes[1])

In [None]:
temp = temp1.merge(temp2, on='date', how='outer')
temp['totalOutstanding'] = temp['totalOutstanding'].fillna(method='ffill')
fig, ax = plt.subplots(figsize=(10, 5), ncols=2)
temp.set_index('date')[['totalOutstanding', 'price']].plot(secondary_y='totalOutstanding', ax=ax[0])

temp['market_cap'] = temp['totalOutstanding'] * temp['price']
temp['market_cap'].plot(ax=ax[1])
ax[0].set_title('Total Outstanding and Price')
ax[1].set_title('Market Cap')
plt.show()

temp['year'] = pd.to_datetime(temp['date']).dt.year
temp = temp.drop_duplicates(subset=['year'], keep='last')
temp['market_cap'] /= 1_000
temp[['date', 'market_cap', 'price']]


# Price Download using SPLIT


In [None]:
response, temp = factset_api.download_fundamentals(id_list=['MH33D6-R'], 
                                #   field_list=['FF_IS_MULTI_SHARE', 'FF_IS_ADR'],
                                  field_list=['FF_COM_SHS_OUT'],
                                    periodicity='ANN', 
                                    start_date='1990-01-01', 
                                    end_date='2024-12-31', 
                                    currency='LOCAL',
                                    update_type='RP', 
                                    verbose=True, 
                                    authorization=credentials.factset_api_authorization)
temp.set_index('reportDate')['value'].plot(kind='bar')


In [None]:
import requests
url = 'https://api.factset.com/content/factset-global-prices/v1/prices?ids=CGF31Z-R&fields=price,priceOpen,priceHigh,priceLow,volume&startDate=2024-01-01&endDate=2024-08-27&frequency=D&calendar=FIVEDAY&currency=EUR&adjust=SPLIT&batch=N'

headers = {'Accept': 'application/json','Content-Type': 'application/json'}

response = requests.get(url, headers=headers, auth = credentials.factset_api_authorization)

print(response.status_code)
print(response.text)

In [None]:
def calculate_cagr(initial, final, years):
    return (final / initial) ** (1 / years) - 1

# Parameters
initial_revenue = 1.00
initial_expenses = 0.30
initial_net_income = initial_revenue - initial_expenses

years = 5

# Define revenue and expense growth rates to iterate over
revenue_growth_rates = np.arange(0.01, 0.1, 0.01)  # 1% to 5%
expense_growth_rates = np.arange(0.01, 0.1, 0.01)  # 1% to 5%

# Create table
cagr_table = pd.DataFrame(index=[f"{e*100:.0f}%" for e in expense_growth_rates],
                          columns=[f"{r*100:.0f}%" for r in revenue_growth_rates])

for e_growth in expense_growth_rates:
    for r_growth in revenue_growth_rates:
        # Compute revenue and expense projections
        final_revenue = initial_revenue * (1 + r_growth) ** years
        final_expenses = initial_expenses * (1 + e_growth) ** years
        final_net_income = final_revenue - final_expenses
        
        # Compute CAGR of net income
        cagr_net_income = calculate_cagr(initial_net_income, final_net_income, years)
        cagr_table.loc[f"{e_growth*100:.0f}%", f"{r_growth*100:.0f}%"] = f"{cagr_net_income*100:.2f}%"

# Display the result
cagr_table.to_csv('/Users/joeybortfeld/Downloads/cagr_table.csv')
