In [None]:
# EXPLORE RETURNS OF EQUAL- AND MCAP- WEIGHTED PORTFOLIOS

# Cut the panel down to just the assets of interest
asset_universe_unique = list(np.unique([asset 
                                        for sublist in token_universe_per_month 
                                        for asset in sublist]))
df = df[df.slug_cmc.isin(asset_universe_unique)]

# Drop rows that do not have previous day information
df = df.sort_values(by=['slug_cmc', 'date'], ignore_index=True)
df.loc[1:, 'day_diff'] = (df.date[1:].values - df.date[:-1]).values.astype('timedelta64[D]').astype(int)
df['day_diff2'] = df.day_diff.shift(-1)
num_rows = df[df.day_diff == 1].shape[0]
df = df[(df.day_diff == 1) | (df.day_diff2 == 1)]
assert(num_rows <= df.shape[0])
df = df.drop(['day_diff2'], axis=1)

# Calculate day over day return
df['r_t'] = df.groupby('slug_cmc')['usd_per_token_cmc'].apply(pd.Series.pct_change)
df = df[df.day_diff == 1]
tokens_to_drop = np.unique(df[df.r_t.isnull()].slug_cmc.values)
df = df[~df.slug_cmc.isin(tokens_to_drop)]
df = df.drop('day_diff', axis=1)

# Cut down to time period of interest
df = df[df.date.dt.year >= 2016]
df = df[df.date.dt.year <= 2022]

# Ensure no missings
assert(0 == df.isnull().sum().sum())

# Clean up index and resort
df = df.sort_values(by=['date', 'slug_cmc'], ignore_index=True)

# Calculate equal and mcap weighted returns by quarter
equal_df = pd.DataFrame()
mcap_df  = pd.DataFrame()
for i in range(1,len(dates)):
    # Set up dates and asset universe
    date = dates[i]
    date_plus_1mo = datetime.datetime.strptime(date, '%Y-%m-%d') + relativedelta(months=1)
    asset_universe = asset_universe_dict[date]

    # Subset to relevant data
    temp_df = df[(df.date >= date) & (df.date < date_plus_1mo)]
    temp_df = temp_df[temp_df.slug_cmc.isin(asset_universe)]

    # Form equal weighted returns
    temp_eq_df = temp_df.groupby('date')[['r_t']].mean()
    equal_df = pd.concat((equal_df, temp_eq_df))

    # Form mcap weighted returns
    temp_df['mcap_sum'] = temp_df.groupby('date')['usd_mcap_cmc'].transform('sum')
    temp_df['mcap_weight'] = temp_df.usd_mcap_cmc / temp_df.mcap_sum
    temp_df['mcap_r_t'] = temp_df.r_t * temp_df.mcap_weight
    temp_mcap_df = temp_df.groupby('date')[['mcap_r_t']].sum()
    mcap_df = pd.concat((mcap_df, temp_mcap_df))

# Ensure no missing
assert(0==equal_df.isnull().sum().values)
assert(0==mcap_df.isnull().sum().values)

# Report returns
print('equal weighted return:')
print(equal_df.apply(geometricAverageSimpleReturns, axis=0).values[0])
print('sharpe:')
print(np.mean(equal_df.r_t.values)/np.std(equal_df.r_t.values))
print('mcap weighted return:')
print(mcap_df.apply(geometricAverageSimpleReturns, axis=0).values[0])
print('sharpe:')
print(np.mean(mcap_df.mcap_r_t.values)/np.std(mcap_df.mcap_r_t.values))

# Form the returns by year
equal_df['year'] = equal_df.index.year
mcap_df['year'] = mcap_df.index.year
print('equal weighted return:')
print(equal_df.groupby('year').apply(geometricAverageSimpleReturns))
print('mcap weighted return:')
print(mcap_df.groupby('year').apply(geometricAverageSimpleReturns))
equal_df = equal_df.drop('year', axis=1)
mcap_df  = mcap_df.drop('year', axis=1)


In [None]:
# MOVE THESE NOTES TO CLEANING

# manually look through it to confirm they are legit tokens
# or maybe give this task to jacob
# or maybe schedule a time to do this with jacob so we 2x the speed

# Lets look to see if the 0.01% mcap rule is good for the entire time period

# Jan 1 2015 - $5B - $500k
# Jan 1 2016 - $7B - $700k
# Jan 1 2017 - $18B - $1.8M
# Jan 1 2018 - $600B - $60M
# Apr 1 2018 - $300B - $30M
# Jul 1 2018 - $250B - $25M
# Jan 1 2019 - $125B - $12M
# Apr 1 2019 - $145B - $14M
# Jul 1 2019 - $330B - $33M
# Oct 1 2019 - $220B - $22M
# Jan 1 2020 - $200B - $20M
# Apr 1 2020 - $175B - $17M
# Jul 1 2020 - $260B - $26M
# Oct 1 2020 - $340B - $34M
# Jan 1 2021 - $770B - $77M
# Apr 1 2021 - $1.9T - $190M
# Jul 1 2021 - $1.4T - $140M
# Oct 1 2021 - $2T - $200M