# Overview:

- This notebook is used to create a training dataset for each constituent in the index including the index return prediction (over the next 1-month) from the index forecasting notebook, the constituent price and fundamental data, and macroeconomic data.

In [142]:
import pandas as pd
constituent_prices = pd.read_csv('data/constituent_prices.csv')
index_predictions = pd.read_csv('data/final_index_predictions.csv')
unprocessed_index_data = pd.read_csv('data/index_data.csv')

# processed index data (from index forecasting notebook) if needed
# processed_index_data = pd.read_csv('data/final_test_data.csv')

In [143]:
# rename date columns to be consistent for merging
unprocessed_index_data.rename(columns={'Unnamed: 0':'Date'}, inplace=True)
index_predictions.rename(columns={'date':'Date'}, inplace=True)

In [144]:
# loop to create training/testing dataframes for each constituent
data_dict = {}

# for each constituent assemble the dataframes
for co in constituent_prices.columns:

    # first column is date so skip
    if co == 'Date':
        continue

    # for all other columns (constituents), assemble the final dataframe by merging the constituent price data, index prediction data, and constituent fundamental data
    else:
        # select constituent price data
        co_prices = constituent_prices[['Date', co]]

        # merge the constituent price data with the index / macro data
        co_data = pd.merge(unprocessed_index_data, co_prices, on='Date', how='left')

        # merge with the index return predictions
        co_data = pd.merge(co_data, index_predictions, on='Date', how='left')

        # select only the data after 2015-01-01 since we only have index prediction data after 2015
        co_data = co_data[co_data['Date'] >= '2015-01-01'][:-20]

        # BF-B is labeled differently in the fundamental data so format the file loading correctly
        if co == 'BF-B':
            co_fundamental = pd.read_excel('data/CON DATA VALUES.xlsx', sheet_name= 'BF B US Equity', header=1)
        else:
            co_fundamental = pd.read_excel('data/CON DATA VALUES.xlsx', sheet_name=f'{co} US Equity', header=1)

        # rename the date column to be consistent for merging
        co_fundamental.rename(columns={'Dates':'Date'}, inplace=True)
        co_fundamental['Date'] = co_fundamental['Date'].astype(str)

        # merge the fundamental data with the constituent price / index / macro data
        co_data = pd.merge(co_data, co_fundamental, on='Date', how='left')
        co_data = co_data.dropna(axis=1)

        # rename price columns to indcate which is the index price and which is the constituent price
        co_data.rename(columns={"Price":'index_price', 'Last Price':'equity_price'}, inplace=True)

        # add data to dictionary
        data_dict[co] = co_data

In [147]:
print(data_dict.keys())
data_dict['KO']

dict_keys(['ADM', 'BF-B', 'CAG', 'CL', 'CLX', 'COST', 'CPB', 'EL', 'GIS', 'HRL', 'HSY', 'K', 'KMB', 'KO', 'KR', 'MDLZ', 'MKC', 'MNST', 'MO', 'PEP', 'PG', 'SJM', 'STZ', 'SYY', 'TAP', 'TSN', 'WBA', 'WMT'])


Unnamed: 0,Date,index_price,GDP,RATE_10,URATE,CPI,PPI,HOUSE,SAVE,Index Enterprise Value,...,KO,prediction,equity_price,Profit Margin_y,Total Debt to Total Equity_y,Price Earnings Ratio (P/E),Revenue,Net Income/Net Profit (Losses),Net Debt,Overridable Adjusted Beta
0,2015-01-02,8.873692,4411435.0,0.709943,5.7,99.298784,192.000,1085.0,6.3,4297.5892,...,31.598585,0.0,42.14,7.0824,136.5957,20.8825,10872.0,770.0,20070.0,0.7609
1,2015-01-05,8.816514,4411435.0,0.709943,5.7,99.298784,192.000,1085.0,6.3,4264.8392,...,31.598585,0.0,42.14,7.0824,136.5957,20.8825,10872.0,770.0,20070.0,0.7543
2,2015-01-06,8.813953,4411435.0,0.709943,5.7,99.298784,192.000,1085.0,6.3,4252.9792,...,31.838535,0.0,42.46,7.0824,136.5957,21.0411,10872.0,770.0,20070.0,0.7509
3,2015-01-07,8.933431,4411435.0,0.709943,5.7,99.298784,192.000,1085.0,6.3,4322.7592,...,32.235947,0.0,42.99,7.0824,136.5957,21.3038,10872.0,770.0,20070.0,0.7525
4,2015-01-08,9.081926,4411435.0,0.709943,5.7,99.298784,192.000,1085.0,6.3,4383.0192,...,32.625870,0.0,43.51,7.0824,136.5957,21.5614,10872.0,770.0,20070.0,0.7524
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2198,2023-09-27,29.930000,6928858.0,1.699708,3.8,129.118446,258.889,1346.0,3.7,6799.8725,...,55.950001,1.0,55.95,21.2746,151.0780,21.3631,11972.0,2547.0,25931.0,0.6430
2199,2023-09-28,30.000000,6928858.0,1.699708,3.8,129.118446,258.889,1346.0,3.7,6811.0825,...,55.810001,1.0,55.81,21.2746,151.0780,21.3096,11972.0,2547.0,25931.0,0.6412
2200,2023-09-29,30.040001,6928858.0,1.699708,3.8,129.118446,258.889,1346.0,3.7,6816.9425,...,55.980000,1.0,55.98,25.8262,144.3287,21.0584,11953.0,3087.0,24736.0,0.6410
2201,2023-10-02,29.660000,6928858.0,2.082461,3.9,129.118446,255.463,1372.0,3.8,6750.3425,...,55.480000,1.0,55.48,25.8262,144.3287,20.8703,11953.0,3087.0,24736.0,0.6407


In [1]:
'''
Introduction


Product
Eulith offers trade execution and risk management software, and third-party Separately Managed Account (SMA) services for digital assets. The company has three core products:

DeFi Armor: a non-custodial access management system for mitigating on-chain trading and operational risks. Enables the separation of on-chain trading and operations by providing a “co-signer” that performs transaction simulations and access control based on user-defined policies.

Poems: an order execution and management system for DeFi fund managers and capital allocators. Provides active DeFi traders and fund managers with a CeFi-like trading experience for DeFi by offering advanced tools and algorithms, data feeds and price quotes, triggers, and pre-trade risk analyses. For investors and capital allocators, Poems offers customized and real-time analytics, portfolio management tools, and security solutions.

Eulith Capital: Separately Managed Account (SMA) and liquidity services for digital assets. Provides third-party risk controls and trading tools via its software products, and legal infrastructure solutions to streamline and de-risk digital asset SMAs. Facilitates fundraising and unlocks liquidity via Eulith’s network of fund managers and capital allocators.

Market Opportunity &Traction

A lack of robust infrastructure severely bottlenecks institutional demand for DeFi. Without in-built security, trade execution, and risk management capabilities, DeFi protocols are unsuited to support large-scale, organizational use. Eulith aims to capitalize on this opportunity by equipping DeFi with the advanced functionality expected by institutional investors and traders without restricting its functionality.

Statistics from PWC”s 2023 Crypto Hedge Fund report support the thesis that demand for professional-grade DeFi infrastructure will grow, implying a large market opportunity for Eulith's products. The report estimates that the total assets under management (AUM) of crypto hedge funds will grow from $2 billion in 2020 to $13 billion in 2023, representing a 550% increase. The report also estimates that the total AUM of crypto hedge funds will grow from $2 billion in 2020 to $13 billion in 2023, representing a 550% increase. The report also estimates that the total AUM of crypto hedge funds will grow from $2 billion in 2020 to $13 billion in 2023, representing a 550% increase. The report also estimates that the total AUM of crypto hedge funds will grow from $2 billion in 2020 to $13 billion in 2023, representing a 550% increase. The report also estimates that the total AUM of crypto hedge funds will grow from $2 billion in 2020 to $13 billion in 2023, representing a 550% increase. The report also estimates that the total AUM of crypto hedge funds will grow from $2 billion in 2020 to $13 billion in 2023, representing a 550% increase. The report also estimates that the total AUM of crypto hedge funds will grow from $2 billion in 2020 to $13 billion in 2023, representing a 550% increase. The report also estimates that the total AUM of crypto hedge funds will grow from $2 billion in 2020 to $13 billion in 2023, representing a 550% increase.
'''

"\nIntroduction\n\n\nProduct\nEulith offers trade execution and risk management software, and third-party Separately Managed Account (SMA) services for digital assets. The company has three core products:\n\nDeFi Armor: a non-custodial access management system for mitigating on-chain trading and operational risks. Enables the separation of on-chain trading and operations by providing a “co-signer” that performs transaction simulations and access control based on user-defined policies.\n\nPoems: an order execution and management system for DeFi fund managers and capital allocators. Provides active DeFi traders and fund managers with a CeFi-like trading experience for DeFi by offering advanced tools and algorithms, data feeds and price quotes, triggers, and pre-trade risk analyses. For investors and capital allocators, Poems offers customized and real-time analytics, portfolio management tools, and security solutions.\n\nEulith Capital: Separately Managed Account (SMA) and liquidity serv