## Top 100 market cap

In [None]:
import requests
import pandas as pd
import json

url = "https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd"
response = requests.get(url)

data = json.loads(response.content)
top100 = pd.DataFrame(data)

In [None]:
top100.to_csv('top_100_cmc.csv')

In [None]:
top100 = pd.read_csv('top_100_cmc.csv')

## ccxt binance

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ccxt

In [None]:
ex = ccxt.binance()
ex.options['maxRetriesOnFailureDelay'] = 1000

In [None]:
# from_ts = ex.parse8601('2010-01-01 00:00:00')
# ohlcv = ex.fetch_ohlcv('BTC/USDT', '1d', since=from_ts, limit=1000)

In [None]:
import ccxt
import pandas as pd
from datetime import datetime, timedelta

# Define the time range
end_date = datetime.now().isoformat()
end_ts = ex.parse8601(end_date)

# Function to fetch OHLCV data in batches
def fetch_ohlcv_in_batches(exchange, symbol, timeframe, since, limit):
    all_ohlcv = []
    while since < end_ts:
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=limit)
        if not ohlcv:
            break
        all_ohlcv.extend(ohlcv)
        since = ohlcv[-1][0] + 1  # Increment since to the last timestamp + 1

        
    return all_ohlcv

def ohlcv_to_df(ohlcv):
    df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df.set_index('timestamp', inplace=True)
    return df

# Fetch data
# ohlcv = ohlcv_to_df(fetch_ohlcv_in_batches(ex, 'BTC/USDT', '1d', start_ts, 1000))


In [None]:
start_date = '2017-08-17 00:00:00'
start_ts = ex.parse8601(start_date)
ohlcvs = {}

for sym in list(top100['symbol']):
        print(f'fetching {sym}')
        try:
            ohlcv_df = ohlcv_to_df(fetch_ohlcv_in_batches(ex, f'{sym.upper()}/USDT', '1d', start_ts, 1000)) 
        except Exception as e:
            print(f"An unexpected error occurred: {sym} - {e}")

        ohlcvs[sym] = ohlcv_df
         

In [None]:
ohlcvs['aave'].head()

## Save & Import data

In [None]:
import pickle

def save_dict_to_pickle(data_dict, filename):
    """
    Save a dictionary with DataFrame values to a pickle file.
    
    Parameters
    ----------
    data_dict : dict
        Dictionary with string keys and DataFrame values.
    filename : str
        The path to the file where the data will be saved.
    """
    with open(filename, 'wb') as f:
        pickle.dump(data_dict, f)

save_dict_to_pickle(ohlcvs, 'input.pkl')


In [None]:
import pickle

def load_dict_from_pickle(filename):
    """
    Load a dictionary with DataFrame values from a pickle file.
    
    Parameters
    ----------
    filename : str
        The path to the file from which the data will be loaded.
    
    Returns
    -------
    data_dict : dict
        Dictionary with string keys and DataFrame values.
    """
    with open(filename, 'rb') as f:
        data_dict = pickle.load(f)
    return data_dict

# Example usage
ohlcvs = load_dict_from_pickle('input.pkl')
# print(ohlcvs)

### Remove irrelevant token

In [None]:
for k in list(ohlcvs.keys()):
    if k in ['steth',
            'weeth',
            'ezeth',
            'reth',
            'meth',
            'eeth',
            'rseth']:
        del ohlcvs[k]

    if 'usd' in k:
        del ohlcvs[k]


## Visualize 

### Using MPF

In [None]:
import mplfinance as mpf
mpf.plot(ohlcvs['eth'], type='line', volume=True, style='yahoo')
plt.show()

### Using Plotly

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Candlestick(x=ohlcvs['eth'].index,
                open=ohlcvs['eth'].open, high=ohlcvs['eth'].high,
                low=ohlcvs['eth'].low, close=ohlcvs['eth'].close)])

fig.update_layout(title='OHLCV Data',
                   xaxis_title='Date',
                   yaxis_title='Price',
                   xaxis_rangeslider_visible=False)

fig.show()

## Resample Adjusted Prices

In [None]:
close = pd.DataFrame()

# Loop through the dictionary and combine the data into the new DataFrame
for ticker, df in ohlcvs.items():
    close[ticker] = df['close']

In [None]:
def resample_prices(close_prices:pd.DataFrame, freq='M'):
    """
    Resample close prices for each ticker at specified frequency.
    
    Parameters
    ----------
    close_prices : DataFrame
        Close prices for each ticker and date
    freq : str
        What frequency to sample at
        For valid freq choices, see http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    
    Returns
    -------
    prices_resampled : DataFrame
        Resampled prices for each ticker and date
    """
    
    return close_prices.resample(freq).last()


In [None]:
monthly_close = resample_prices(close)
monthly_close.tail()

## Compute Log Returns

In [None]:
def compute_log_returns(prices):
    """
    Compute log returns for each ticker.
    
    Parameters
    ----------
    prices : DataFrame
        Prices for each ticker and date
    
    Returns
    -------
    log_returns : DataFrame
        Log returns for each ticker and date
    """
    previous_prices = prices.shift(periods = 1)
    
    return np.log(prices / previous_prices)

In [None]:
monthly_close_returns = compute_log_returns(monthly_close)

In [None]:
monthly_close_returns

## Shift Returns

In [None]:
def shift_returns(returns, shift_n):
    """
    Generate shifted returns
    
    Parameters
    ----------
    returns : DataFrame
        Returns for each ticker and date
    shift_n : int
        Number of periods to move, can be positive or negative
    
    Returns
    -------
    shifted_returns : DataFrame
        Shifted returns for each ticker and date
    """
    
    return returns.shift(periods = shift_n)

In [None]:
prev_returns = shift_returns(monthly_close_returns, 1)
lookahead_returns = shift_returns(monthly_close_returns, -1)

In [None]:
def get_top_n(prev_returns, top_n):
    """
    Select the top performing crypto
    
    Parameters
    ----------
    prev_returns : DataFrame
        Previous shifted returns for each ticker and date
    top_n : int
        The number of top performing crypto to get
    
    Returns
    -------
    top_crypto : DataFrame
        Top crypto for each ticker and date marked with a 1
    """
    # TODO: Implement Function
    top_crypto = pd.DataFrame(index = prev_returns.index, columns = prev_returns.columns)
    for index, row in prev_returns.iterrows():
        top_crypto.loc[index] = row.isin(row.nlargest(top_n)).astype(np.int64)
        
    return top_crypto

In [None]:
top_bottom_n = 20
df_long = get_top_n(prev_returns, top_bottom_n)
df_short = get_top_n(-1*prev_returns, top_bottom_n)

In [None]:
def print_top(df, name, top_n=10):
    print('{} Most {}:'.format(top_n, name))
    print(', '.join(df.sum().sort_values(ascending=False).index[:top_n].values.tolist()))

print_top(df_long, 'Longed crypto')
print_top(df_short, 'Shorted crypto')

In [None]:
def portfolio_returns(df_long, df_short, lookahead_returns, n_crypto) -> pd.DataFrame:
    """
    Compute expected returns for the portfolio, assuming equal investment in each long/short stock.
    
    Parameters
    ----------
    df_long : DataFrame
        Top crypto for each ticker and date marked with a 1
    df_short : DataFrame
        Bottom crypto for each ticker and date marked with a 1
    lookahead_returns : DataFrame
        Lookahead returns for each ticker and date
    n_crypto: int
        The number of crypto chosen for each month
    
    Returns
    -------
    portfolio_returns : DataFrame
        Expected portfolio returns for each ticker and date
    """
    # TODO: Implement Function
    
    return (df_long - df_short) * lookahead_returns / n_crypto

In [None]:
expected_portfolio_returns = portfolio_returns(df_long, df_short, lookahead_returns, 2*top_bottom_n)

In [None]:
expected_portfolio_returns_by_date = expected_portfolio_returns.T.sum().dropna()
portfolio_ret_mean = expected_portfolio_returns_by_date.mean()
portfolio_ret_ste = expected_portfolio_returns_by_date.sem()
portfolio_ret_annual_rate = (np.exp(portfolio_ret_mean * 12) - 1) * 100

print("""
Mean:                       {:.6f}
Standard Error:             {:.6f}
Annualized Rate of Return:  {:.2f}%
""".format(portfolio_ret_mean, portfolio_ret_ste, portfolio_ret_annual_rate))

In [None]:
from scipy import stats

def analyze_alpha(expected_portfolio_returns_by_date):
    """
    Perform a t-test with the null hypothesis being that the expected mean return is zero.
    
    Parameters
    ----------
    expected_portfolio_returns_by_date : Pandas Series
        Expected portfolio returns for each date
    
    Returns
    -------
    t_value
        T-statistic from t-test
    p_value
        Corresponding p-value
    """
    t_stastic, p_value = stats.ttest_1samp(expected_portfolio_returns_by_date, 0)
    return t_stastic, p_value / 2

In [None]:
t_value, p_value = analyze_alpha(pd.to_numeric(expected_portfolio_returns_by_date.values, errors='coerce'))
print("""
Alpha analysis:
 t-value:        {:.3f}
 p-value:        {:.6f}
""".format(t_value, p_value))

rugi dong :v