In [36]:
import pandas as pd
import json
pd.set_option('display.max_columns', 500)

df = pd.read_csv("/Users/yegortrussov/Documents/ml/lending_protocols/dataset_collection/data/markets_raw/eth_cbbtc_usdt.csv")
with open("/Users/yegortrussov/Documents/ml/lending_protocols/dataset_collection/data/common/markets_meta.json", 'r') as f:
    markets_meta = json.load(f)
with open("/Users/yegortrussov/Documents/ml/lending_protocols/dataset_collection/data/common/assets_meta.json", 'r') as f:
    assets_meta = json.load(f)

market_meta = markets_meta["0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8dbd34ad64a658b2b05bca"]
asset_meta = assets_meta[market_meta["collateral_asset_address"]]
loan_asset_meta = assets_meta[market_meta["loan_asset_address"]]

market_meta

{'address': '0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8dbd34ad64a658b2b05bca',
 'lltv': '860000000000000000',
 'oracle_address': '0x0E053750DFA4E809E5f7b119832C799c2aA138ac',
 'creation_datetime': 1761445331,
 'network': 'eth',
 'loan_asset_address': '0xdAC17F958D2ee523a2206206994597C13D831ec7',
 'loan_asset_symbol': 'USDT',
 'loan_asset_decimals': 6,
 'collateral_asset_address': '0xcbB7C0000aB88B473b1f5aFd9ef808440eed33Bf',
 'collateral_asset_symbol': 'cbBTC',
 'collateral_asset_decimals': 8,
 'irm_curve': [[0, 0.006689815213676517, 0],
  [0.01, 0.0069158059579373, 6.9158059579373e-05],
  [0.02, 0.007141796702198083, 0.00014283593404396168],
  [0.03, 0.007367787446458867, 0.000221033623393766],
  [0.04, 0.007593778190719651, 0.00030375112762878607],
  [0.05, 0.007819768934980435, 0.0003909884467490218],
  [0.06, 0.008045759679241218, 0.0004827455807544731],
  [0.07, 0.008271750423502002, 0.0005790225296451402],
  [0.08, 0.008497741167762786, 0.0006798192934210228],
  [0.09, 0.00872

In [18]:
df.head(2)
len(asset_meta["historical_price"])


17488

In [4]:
import matplotlib.pyplot as plt
def plot_daily_metrics(df_orig, col="assets_usd", date_col="datetime", cutoff=None, agg_func='last'):
    df = df_orig.copy().fillna(0)
    df[date_col] = pd.to_datetime(df[date_col])
    
    df['date_only'] = df[date_col].dt.date
    
    if agg_func == 'last':
        df = df.sort_values(date_col)
        df = df.drop_duplicates(subset='date_only', keep='last')
    elif agg_func == 'mean':
        df = df.groupby('date_only')[col].mean().reset_index()
        df[date_col] = pd.to_datetime(df['date_only'])
    else:
        raise ValueError("agg_func must be 'last' or 'mean'")
    
    if cutoff is not None:
        df = df[df[date_col] >= cutoff]
    
    daily_df = df.groupby(date_col)[col].sum().reset_index()
    
    plt.figure(figsize=(14, 7))
    plt.plot(daily_df[date_col], daily_df[col], linewidth=2)
    
    plt.title(f'Daily {col} ({agg_func})', fontsize=16)
    plt.xlabel('Date', fontsize=12)
    plt.ylabel(col, fontsize=12)
    plt.grid(True, alpha=0.3)
    
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
def plot_daily_metrics_express(df_orig, cols=["assets_usd"], date_col="datetime", cutoff=None, agg_func='last'):
    df = df_orig.copy().fillna(0)
    df[date_col] = pd.to_datetime(df[date_col])
    df['date_only'] = df[date_col].dt.date
    
    if agg_func == 'last':
        df = df.sort_values(date_col)
        df = df.drop_duplicates(subset='date_only', keep='last')
    elif agg_func == 'mean':
        grouped = df.groupby('date_only')[cols].mean().reset_index()
        df = grouped.copy()
        df[date_col] = pd.to_datetime(df['date_only'])
    
    if cutoff is not None:
        df = df[df[date_col] >= cutoff]
    
    daily_df = df.groupby(date_col)[cols].sum().reset_index()
    
    # Melt the dataframe for Plotly Express
    melted_df = daily_df.melt(id_vars=[date_col], value_vars=cols, 
                            var_name='metric', value_name='value')
    
    fig = px.line(
        melted_df, 
        x=date_col, 
        y='value',
        color='metric',
        title=f'Daily Metrics ({agg_func})',
        labels={'value': 'Value', date_col: 'Date', 'metric': 'Metric'},
        hover_data={date_col: '|%Y-%m-%d', 'value': ':,.5f'}
    )
    
    fig.update_traces(
        mode='lines+markers',
        line=dict(width=2),
        marker=dict(size=4),
        hovertemplate='<b>Date:</b> %{x|%Y-%m-%d}<br>' +
                     '<b>%{fullData.name}:</b> %{y:,.5f}<br>' +
                     '<extra></extra>'
    )
    
    fig.update_layout(
        width=1000,
        height=500,
        xaxis=dict(showgrid=True, gridwidth=1, gridcolor='LightGray'),
        yaxis=dict(showgrid=True, gridwidth=1, gridcolor='LightGray'),
        plot_bgcolor='white'
    )
    
    fig.show()

import plotly.express as px
import pandas as pd

def plot_metrics_timeseries(df_orig, col="assets_usd", date_col="datetime", cutoff=None):
    """
    Plot metrics as a timeseries with each row represented, sorted by timestamp.
    
    Parameters:
    - df_orig: DataFrame containing the data
    - col: Column name to plot
    - date_col: Name of the datetime column
    - cutoff: Minimum date to include (optional)
    """
    df = df_orig.copy().fillna(0)
    
    # Convert to datetime and sort by timestamp
    df[date_col] = pd.to_datetime(df[date_col])
    df = df.sort_values(date_col)
    
    # Apply cutoff if specified
    if cutoff is not None:
        cutoff = pd.to_datetime(cutoff)
        df = df[df[date_col] >= cutoff]
    
    # Create interactive plot with Plotly Express
    fig = px.line(
        df, 
        x=date_col, 
        y=col,
        title=f'Timeseries of {col}',
        labels={col: col, date_col: 'Timestamp'},
        hover_data={col: ':,.0f', date_col: '|%Y-%m-%d %H:%M:%S'}
    )
    
    # Customize the hover template to show full timestamp
    fig.update_traces(
        mode='lines+markers',
        line=dict(width=2),
        marker=dict(size=4),
        hovertemplate='<b>Timestamp:</b> %{x|%Y-%m-%d %H:%M:%S}<br>' +
                     f'<b>{col}:</b> %{{y:,.5f}}<br>' +
                     '<extra></extra>'
    )
    
    fig.update_layout(
        width=1200,
        height=600,
        xaxis=dict(
            showgrid=True, 
            gridwidth=1, 
            gridcolor='LightGray',
            title='Timestamp'
        ),
        yaxis=dict(
            showgrid=True, 
            gridwidth=1, 
            gridcolor='LightGray',
            title=col
        ),
        plot_bgcolor='white'
    )
    
    fig.show()
    
    return fig
    

In [39]:
import numpy as np
def add_interest_rates(df, irm_data):
    u, b, s = np.array(irm_data).T
    
    df = df.copy()
    
    df['borrow_rate_before'] = np.interp(df["utilization_before"].clip(0, 1), u, b)
    df['supply_rate_before'] = np.interp(df["utilization_before"].clip(0, 1), u, s)
    
    df['borrow_rate_after'] = np.interp(df["utilization_after"].clip(0, 1), u, b)
    df['supply_rate_after'] = np.interp(df["utilization_after"].clip(0, 1), u, s)
    
    if 'new_utilization' in df.columns:
        df['borrow_rate_after'] = np.interp(df['new_utilization'].clip(0, 1), u, b)
        df['supply_rate_after'] = np.interp(df['new_utilization'].clip(0, 1), u, s)
    
    return df

def add_collateral_prices(df, price_data, col="collateral_price"):
    timestamps, prices = np.array(price_data).T
    
    df = df.copy()
    
    def find_closest_price(tx_timestamp):
        idx = np.searchsorted(timestamps, tx_timestamp)
        
        if idx == 0:
            return prices[0]
        elif idx == len(timestamps):
            return prices[-1]
        else:
            left_diff = tx_timestamp - timestamps[idx-1]
            right_diff = timestamps[idx] - tx_timestamp
            
            if left_diff <= right_diff:
                return prices[idx-1]
            else:
                return prices[idx]
    
    df[col] = df['timestamp'].apply(find_closest_price)
    
    if 'assets' in df.columns and col == "collateral_price":
        df['collateral_value'] = df['assets'] * df['collateral_price']
    
    return df

def calculate_metrics(df, irm_data, asset_data, use_collateral=True):
    df = df.fillna(0).sort_values(['timestamp', 'hash'])
    df = df.reset_index(drop=True)
    
    metrics_rows = []
    total_supply = 0.0
    total_borrow = 0.0
    
    i = 0
    n = len(df)
    
    while i < n:
        current_hash = df.iloc[i]['hash']
        tx_start_idx = i
        
        tx_actions = []
        before_supply = total_supply
        before_borrow = total_borrow
        
        while i < n and df.iloc[i]['hash'] == current_hash:
            row = df.iloc[i]
            amount = abs(0 if row['assets'] is None else row['assets'] / 10**6)
            
            if use_collateral:

                if row['type'] in ['MarketSupply', 'MarketSupplyCollateral']:
                    total_supply += amount
                elif row['type'] in ['MarketWithdraw', 'MarketWithdrawCollateral']:
                    total_supply -= amount
            else:
                if row['type'] in ['MarketSupply']:
                    total_supply += amount
                elif row['type'] in ['MarketWithdraw']:
                    total_supply -= amount
            if row['type'] == 'MarketBorrow':
                total_borrow += amount
            elif row['type'] == 'MarketRepay':
                total_borrow -= amount
            
            i += 1
        
        after_supply = total_supply
        after_borrow = total_borrow
        
        before_util = before_borrow / before_supply if before_supply > 0 else 0
        after_util = after_borrow / after_supply if after_supply > 0 else 0
        
        metrics_rows.append({
            'hash': current_hash,
            'timestamp': df.iloc[tx_start_idx]['timestamp'],
            'datetime': df.iloc[tx_start_idx]['datetime'],
            'total_supply_before': before_supply,
            'total_borrow_before': before_borrow,
            'total_supply_after': after_supply,
            'total_borrow_after': after_borrow,
            'utilization_before': before_util,
            'utilization_after': after_util,
            'tx_actions': i - tx_start_idx
        })
    

    res = add_interest_rates(pd.DataFrame(metrics_rows), irm_data)
    res = add_collateral_prices(res, asset_data["historical_price"])
    res = add_collateral_prices(res, loan_asset_meta["historical_price"], col="loan_asset_price")
    
    return res


# plot_daily_metrics_express(calculate_metrics(df, use_collateral=False), "utilization_after", agg_func="meann")
metrics = calculate_metrics(df, use_collateral=False, irm_data=market_meta["irm_curve"], asset_data=asset_meta)
metrics
# _ = plot_metrics_timeseries(metrics, "utilization_after")
# _ = plot_daily_metrics_express(metrics, ["supply_rate_after", "borrow_rate_after"], agg_func="last")
# _ = plot_metrics_timeseries(metrics, "borrow_rate_after")
# _ = plot_daily_metrics_express(metrics, ["collateral_price"], agg_func="last")


Unnamed: 0,hash,timestamp,datetime,total_supply_before,total_borrow_before,total_supply_after,total_borrow_after,utilization_before,utilization_after,tx_actions,borrow_rate_before,supply_rate_before,borrow_rate_after,supply_rate_after,collateral_price,loan_asset_price
0,0xa5d64f0a0881150ac9e209470f4378f897d812965951...,1761446903,2025-10-26 02:48:23,0.000000e+00,0.000000e+00,1.000000e+00,0.000000e+00,0.000000,0.000000,1,0.006690,0.000000,0.006690,0.000000e+00,111550.118145,1.000379
1,0x769086d9812ed5f61210015e00744e7597370e9d4812...,1761446927,2025-10-26 02:48:47,1.000000e+00,0.000000e+00,1.000000e+00,0.000000e+00,0.000000,0.000000,1,0.006690,0.000000,0.006690,0.000000e+00,111550.118145,1.000379
2,0x0bd7a21adc448f0be70f9aeff1810e4aadf81594cea1...,1761446939,2025-10-26 02:48:59,1.000000e+00,0.000000e+00,1.000000e+00,1.000000e+00,0.000000,1.000000,1,0.006690,0.000000,0.112579,1.125788e-01,111550.118145,1.000379
3,0x5773092976efbd11bac571c4a18dec90c787d40014ad...,1761763931,2025-10-29 18:52:11,1.000000e+00,1.000000e+00,1.200000e+00,1.000000e+00,1.000000,0.833333,1,0.112579,0.112579,0.025522,2.126915e-02,110825.231405,1.000234
4,0x8eac7ccf48aecf50b6ac24ea1fb2bc8993f5d4f92618...,1761769415,2025-10-29 20:23:35,1.200000e+00,1.000000e+00,1.001283e+04,1.000000e+00,0.833333,0.000100,1,0.025522,0.021269,0.006692,6.906944e-07,110591.083494,1.000370
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
997,0xd8f51ee95562eba27c7ad9be93dbc409ecdf63e9039c...,1767682931,2026-01-06 07:02:11,3.445363e+06,3.104140e+06,3.449868e+06,3.104140e+06,0.900962,0.899785,1,0.027852,0.025101,0.027024,2.431597e-02,87435.493935,0.998655
998,0xec944a4c9ffd4d229fd547cf65025ebf19c116c7de7b...,1767695531,2026-01-06 10:32:11,3.449868e+06,3.104140e+06,3.529700e+06,3.104140e+06,0.899785,0.879435,1,0.027024,0.024316,0.026564,2.336162e-02,87435.493935,0.998655
999,0xbfde4a6f74683ed2ecd0a8075eafa4a85222ea014370...,1767708419,2026-01-06 14:06:59,3.529700e+06,3.104140e+06,3.529700e+06,3.104140e+06,0.879435,0.879435,1,0.026564,0.023362,0.026564,2.336162e-02,87435.493935,0.998655
1000,0x06f5d7b58441fe2f20e6c55e36bcfe4d785bc3cc66e3...,1767709355,2026-01-06 14:22:35,3.529700e+06,3.104140e+06,3.529700e+06,3.095366e+06,0.879435,0.876949,2,0.026564,0.023362,0.026508,2.324668e-02,87435.493935,0.998655


In [None]:
from tqdm import tqdm
def add_user_ltv(df):
    df = df.sort_values(['user_address', 'timestamp', 'hash'])
    
    result_rows = []
    
    for address in df['user_address'].unique():
        user_df = df[df['user_address'] == address].copy()
        
        collateral = 0.0
        debt = 0.0
        
        current_hash = None
        hash_data = []
        
        for idx, row in user_df.iterrows():
            if row['hash'] != current_hash:
                if current_hash is not None and hash_data:
                    first_row = hash_data[0]
                    last_row = hash_data[-1]
                    
                    price = first_row['collateral_price'] if 'collateral_price' in df.columns else 1.0
                    loan_asset_price = first_row['loan_asset_price']
                    
                    before_ltv = start_debt / (start_collateral * price) if start_collateral * price > 0 else 0
                    after_ltv = debt / (collateral * price) if collateral * price > 0 else 0
                    
                    for h_row in hash_data:
                        h_row['collateral_before'] = start_collateral
                        h_row['collateral_value_before'] = start_collateral * price
                        h_row['debt_before'] = start_debt  * loan_asset_price
                        h_row['ltv_before'] = before_ltv
                        h_row['collateral_after'] = collateral
                        h_row['collateral_value_after'] = collateral * price
                        h_row['debt_after'] = debt * loan_asset_price
                        h_row['ltv_after'] = after_ltv
                        result_rows.append(h_row)
                
                current_hash = row['hash']
                hash_data = []
                start_collateral = collateral
                start_debt = debt
            
            row_dict = row.to_dict()
            
            amount = abs(row['assets'])
            
            if row['type'] in ['MarketSupplyCollateral']:
                collateral += amount / (10**asset_meta["decimals"])
            elif row['type'] in ['MarketWithdrawCollateral']:
                collateral -= amount / (10**asset_meta["decimals"])
            elif row['type'] == 'MarketBorrow':
                debt += amount / (10**loan_asset_meta["decimals"])
            elif row['type'] == 'MarketRepay':
                debt -= amount / (10**loan_asset_meta["decimals"])

            if abs(debt) < 1e-6:
                debt = 0 
            if abs(collateral) < 1e-11:
                collateral = 0 
            
            
            hash_data.append(row_dict)
        
        if current_hash is not None and hash_data:
            first_row = hash_data[0]
            last_row = hash_data[-1]
            
            price = first_row['collateral_price']
            loan_asset_price = first_row['loan_asset_price']
            
            before_ltv = (start_debt * loan_asset_price) / (start_collateral * price) if start_collateral * price > 0 else 0
            after_ltv = (debt * loan_asset_price) / (collateral * price) if collateral * price > 0 else 0
            
            for h_row in hash_data:
                h_row['collateral_before'] = start_collateral
                h_row['collateral_value_before'] = start_collateral * price
                h_row['debt_before'] = start_debt * loan_asset_price
                h_row['ltv_before'] = before_ltv
                h_row['collateral_after'] = collateral
                h_row['collateral_value_after'] = collateral * price
                h_row['debt_after'] = debt * loan_asset_price
                h_row['ltv_after'] = after_ltv
                result_rows.append(h_row)
    
    result_df = pd.DataFrame(result_rows)
    
    return result_df.sort_values(["timestamp", "hash"])


add_user_ltv(df.merge(metrics.drop(columns=["timestamp", "datetime"]), how="left", on="hash"))
# df.shape

Unnamed: 0,hash,type,timestamp,user_address,assets,assets_usd,market,datetime,market_address,total_supply_before,total_borrow_before,total_supply_after,total_borrow_after,utilization_before,utilization_after,tx_actions,borrow_rate_before,supply_rate_before,borrow_rate_after,supply_rate_after,collateral_price,loan_asset_price,collateral_before,debt_before,ltv_before,collateral_after,debt_after,ltv_after
1000,0xa5d64f0a0881150ac9e209470f4378f897d812965951...,MarketSupply,1761446903,0xB47f11484e19f1914D32fd393b17671221C10F1F,1000000,1.000362,eth_cbbtc_usdt,2025-10-26 02:48:23,0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8d...,0.000000e+00,0.000000e+00,1.000000e+00,0.000000e+00,0.000000,0.000000,1,0.006690,0.000000,0.006690,0.000000e+00,111550.118145,1.000379,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1001,0x769086d9812ed5f61210015e00744e7597370e9d4812...,MarketSupplyCollateral,1761446927,0xB47f11484e19f1914D32fd393b17671221C10F1F,3000,3.354816,eth_cbbtc_usdt,2025-10-26 02:48:47,0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8d...,1.000000e+00,0.000000e+00,1.000000e+00,0.000000e+00,0.000000,0.000000,1,0.006690,0.000000,0.006690,0.000000e+00,111550.118145,1.000379,0.000000,0.000000,0.000000,0.000030,0.000000,0.000000
1002,0x0bd7a21adc448f0be70f9aeff1810e4aadf81594cea1...,MarketBorrow,1761446939,0xB47f11484e19f1914D32fd393b17671221C10F1F,1000000,1.000362,eth_cbbtc_usdt,2025-10-26 02:48:59,0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8d...,1.000000e+00,0.000000e+00,1.000000e+00,1.000000e+00,0.000000,1.000000,1,0.006690,0.000000,0.112579,1.125788e-01,111550.118145,1.000379,0.000030,0.000000,0.000000,0.000030,1.000379,0.298819
1003,0x5773092976efbd11bac571c4a18dec90c787d40014ad...,MarketSupply,1761763931,0xB47f11484e19f1914D32fd393b17671221C10F1F,200000,0.200042,eth_cbbtc_usdt,2025-10-29 18:52:11,0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8d...,1.000000e+00,1.000000e+00,1.200000e+00,1.000000e+00,1.000000,0.833333,1,0.112579,0.112579,0.025522,2.126915e-02,110825.231405,1.000234,0.000030,1.000234,0.300844,0.000030,1.000234,0.300844
334,0x8eac7ccf48aecf50b6ac24ea1fb2bc8993f5d4f92618...,MarketSupply,1761769415,0x79FD640000F8563A866322483524a4b48f1Ed702,10011631050,10015.332011,eth_cbbtc_usdt,2025-10-29 20:23:35,0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8d...,1.200000e+00,1.000000e+00,1.001283e+04,1.000000e+00,0.833333,0.000100,1,0.025522,0.021269,0.006692,6.906944e-07,110591.083494,1.000370,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
941,0xec944a4c9ffd4d229fd547cf65025ebf19c116c7de7b...,MarketSupply,1767695531,0x8CB3649114051cA5119141a34C200D65dc0Faa73,79832402742,79803.968192,eth_cbbtc_usdt,2026-01-06 10:32:11,0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8d...,3.449868e+06,3.104140e+06,3.529700e+06,3.104140e+06,0.899785,0.879435,1,0.027024,0.024316,0.026564,2.336162e-02,87435.493935,0.998655,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
618,0xbfde4a6f74683ed2ecd0a8075eafa4a85222ea014370...,MarketWithdrawCollateral,1767708419,0x7EA3A42531919F3AfEE4Dd80166c0289F5cdA169,20000000,18742.120300,eth_cbbtc_usdt,2026-01-06 14:06:59,0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8d...,3.529700e+06,3.104140e+06,3.529700e+06,3.104140e+06,0.879435,0.879435,1,0.026564,0.023362,0.026564,2.336162e-02,87435.493935,0.998655,1.055996,51849.255454,0.562312,0.855996,51849.255454,0.693694
619,0x06f5d7b58441fe2f20e6c55e36bcfe4d785bc3cc66e3...,MarketRepay,1767709355,0x7EA3A42531919F3AfEE4Dd80166c0289F5cdA169,8774062107,8771.022130,eth_cbbtc_usdt,2026-01-06 14:22:35,0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8d...,3.529700e+06,3.104140e+06,3.529700e+06,3.095366e+06,0.879435,0.876949,2,0.026564,0.023362,0.026508,2.324668e-02,87435.493935,0.998655,0.855996,51849.255454,0.693694,0.705996,43086.998272,0.698942
620,0x06f5d7b58441fe2f20e6c55e36bcfe4d785bc3cc66e3...,MarketWithdrawCollateral,1767709355,0x7EA3A42531919F3AfEE4Dd80166c0289F5cdA169,15000000,14056.590225,eth_cbbtc_usdt,2026-01-06 14:22:35,0x45671fb8d5dea1c4fbca0b8548ad742f6643300eeb8d...,3.529700e+06,3.104140e+06,3.529700e+06,3.095366e+06,0.879435,0.876949,2,0.026564,0.023362,0.026508,2.324668e-02,87435.493935,0.998655,0.855996,51849.255454,0.693694,0.705996,43086.998272,0.698942


In [20]:
# metrics = calculate_metrics(df, use_collateral=False)

# # metrics[metrics["utilization_after"]>1].ind

# metrics["utilization_after"].describe()

metrics.columns
# df["type"].value_counts()

Index(['hash', 'timestamp', 'datetime', 'total_supply_before',
       'total_borrow_before', 'total_supply_after', 'total_borrow_after',
       'utilization_before', 'utilization_after', 'tx_actions',
       'borrow_rate_before', 'supply_rate_before', 'borrow_rate_after',
       'supply_rate_after', 'collateral_price'],
      dtype='object')

In [42]:
def build_enriched_df(name, raw_df, market_meta):
    metrics = calculate_metrics(raw_df, use_collateral=False, irm_data=market_meta["irm_curve"], asset_data=asset_meta)

    enriched = raw_df.merge(metrics.drop(columns=["timestamp", "datetime"]))
    enriched = add_user_ltv(enriched)

    enriched.to_csv(f"/Users/yegortrussov/Documents/ml/lending_protocols/dataset_collection/data/markets_enriched/{name}.csv", index=False)

build_enriched_df(
    "eth_cbbtc_usdt",
    df,
    market_meta
)

In [11]:
df["type"].unique()

array(['MarketSupply', 'MarketSupplyCollateral', 'MarketBorrow',
       'MarketWithdraw', 'MarketWithdrawCollateral', 'MarketRepay'],
      dtype=object)

In [12]:
market_meta["collateral_asset_address"]

'0xcbB7C0000aB88B473b1f5aFd9ef808440eed33Bf'