In [1]:
import os
import json
import pandas as pd
import numpy as np
import cvxpy as cp
import ast

### read in data

In [2]:
market_cap_categories = {
    'XL Market Cap': 5e10,
    'Large Market Cap': 1e10,
    'Medium Market Cap': 5e9,
    'Small Market Cap': 1e9
}

def define_marketcap_category(market_cap: float):
    for key, value in market_cap_categories.items():
        if market_cap > value:
            return key
    return 'XS Market Cap'

In [25]:
def process_price_data(path: str ='data/prices.csv', window_size: int = 7, max_null_price: int = 50):

    # read and pivot
    df_prices = pd.read_csv(path, parse_dates=['date'])
    pivot_price = df_prices.pivot(index='date', columns='coin', values='prices')

    # drop any with too many null values
    n_null_price = pivot_price.isna().sum()
    min_null_price = max_null_price
    to_drop = n_null_price[n_null_price > min_null_price].index.to_list()
    pivot_price = pivot_price[[c for c in pivot_price.columns if c not in to_drop]]
    
    # compute expected return and covariance
    pct_change = pivot_price.diff(window_size) / pivot_price
    mu_expected_return = pct_change.mean()
    sigma_covariance = pct_change.cov()
    return mu_expected_return, sigma_covariance, df_prices, to_drop

def process_coin_metadata(
        to_drop: list,
        metadata_path = 'data/coin_metadata.csv',
        category_groupings_path = 'data/category_groupings.json',
):
    
    with open(category_groupings_path, 'r', encoding='utf-8') as f:
        dct_category_groupings = json.load(f)
    lst_categories = sorted(set(np.concatenate([list(v) for v in dct_category_groupings.values()])))

    df_meta = pd.read_csv(metadata_path)
    df_meta = df_meta[~df_meta['id'].isin(to_drop)]
    df_meta['categories'] = df_meta['categories'].apply(ast.literal_eval)
    df_meta['market_cap_category'] = df_meta['market_caps'].map(define_marketcap_category)

    df_categories = df_meta[['id','categories']].explode('categories')

    dct_coin_category = pd.concat([
        df_categories[df_categories['categories'].isin(lst_categories)].groupby('categories')['id'].apply(list),
        df_meta.groupby('market_cap_category')['id'].apply(list)
    ])

    lst_assets = sorted(df_meta['id'])
    lst_categories = sorted(dct_coin_category.keys())

    return df_meta.set_index('id'), dct_category_groupings, dct_coin_category, lst_assets, lst_categories

In [26]:
mu_expected_return, sigma_covariance, df_prices, to_drop = process_price_data()

df_meta, dct_category_groupings, dct_coin_category, lst_assets, lst_categories = process_coin_metadata(to_drop=to_drop)

n_assets = len(lst_assets)

default_max_assets = 10

default_min_weights_assets = {asset: 0.0 for asset in lst_assets}
default_max_weights_assets = {asset: 0.5 for asset in lst_assets}

default_min_weights_categories = {category: 0.0 for category in lst_categories}
default_max_weights_categories = {category: 0.5 for category in lst_categories}

In [30]:
weights = cp.Variable(n_assets)
binary_selection = cp.Variable(n_assets, boolean=True)

mu = mu_expected_return.values
sigma = sigma_covariance.values
sigma_wrapped = cp.psd_wrap(sigma)

max_risk = 0.001  # Maximum acceptable variance (risk level)
risk = cp.quad_form(weights, sigma_wrapped)

expected_return = mu.T @ weights
objective = cp.Maximize(expected_return)

constraints = [
    cp.sum(weights) == 1,  # Sum of weights is 1
    weights >= 0,          # No short selling
    #cp.sum(binary_selection) <= default_max_assets,  # No more than 5 assets
    #weights <= binary_selection,     # Link weights to selection
    weights >= [default_min_weights_assets[asset] for asset in lst_assets], #
    weights <= [default_max_weights_assets[asset] for asset in lst_assets], #
    risk <= max_risk
]

prob = cp.Problem(objective, constraints)
prob.solve()

optimized_weights = weights.value

pd.Series(dict(zip(lst_assets, optimized_weights))).sort_values(ascending=False).head(10)

    Your problem is being solved with the ECOS solver by default. Starting in 
    CVXPY 1.5.0, Clarabel will be used as the default solver instead. To continue 
    using ECOS, specify the ECOS solver explicitly using the ``solver=cp.ECOS`` 
    argument to the ``problem.solve`` method.
    


nosana          5.000000e-01
bonk            5.000000e-01
aioz-network    2.163253e-12
paal-ai         1.388748e-12
mantra-dao      5.118258e-13
beam-2          4.814386e-13
superfarm       4.066362e-13
fetch-ai        3.503545e-13
corgiai         3.490064e-13
bittensor       3.394236e-13
dtype: float64