In [None]:
import tensorflow as tf
import keras
import pandas as pd
import numpy as np
import pdblp as bbg
from sklearn import preprocessing as pp
import sklearn as skl
import matplotlib.pyplot as plt
import seaborn as sns
import os
import yellowbrick
import requests
import json
print(tf.__version__)
from yellowbrick.features import RadViz, Rank2D, Rank1D, ParallelCoordinates
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
%matplotlib inline

In [None]:
def scatter_plot(data, feature, target):
    plt.figure(figsize=(16,8))
    plt.scatter(data[feature], 
                data[target],
                c='black'
    )
    plt.xlabel(f'Feature = {feature}')
    plt.ylabel(f'Target = {target}')

In [None]:
def get_coin_metrics_assets():
    coin_metrics_session = requests.Session()
    coin_metrics_url = 'https://api.coinmetrics.io/v2/assets?'
    coin_metrics_api_key = '&api_key=DlgTDcpDmRS8H5gNnila' #append to every request
    response = coin_metrics_session.get(coin_metrics_url + coin_metrics_api_key, verify=False)
    return json.loads(response.text)

In [None]:
def get_coin_metrics_asset_info(asset_id):
    coin_metrics_session = requests.Session()
    coin_metrics_url = f'https://api.coinmetrics.io/v2/assets/{asset_id}'
    coin_metrics_api_key = '?api_key=DlgTDcpDmRS8H5gNnila' #append to every request
    #print(coin_metrics_url + coin_metrics_api_key)
    response = coin_metrics_session.get(coin_metrics_url + coin_metrics_api_key, verify=False)
    return json.loads(response.text)

In [None]:
def get_coin_metrics_metrics():
    coin_metrics_session = requests.Session()
    coin_metrics_url = 'https://api.coinmetrics.io/v2/metric?'
    coin_metrics_api_key = '&api_key=DlgTDcpDmRS8H5gNnila' #append to every request
    response = coin_metrics_session.get(coin_metrics_url + coin_metrics_api_key, verify=False)
    return json.loads(response.text)

In [None]:
def get_coin_metrics_data(asset_id, metrics):
    coin_metrics_session = requests.Session()
    coin_metrics_url = f'https://api.coinmetrics.io/v2/assets/{asset_id}/metricdata?'
    coin_metrics_api_key = 'api_key=DlgTDcpDmRS8H5gNnila' #append to every request
    query_string = f'&metrics={metrics}'
    print(coin_metrics_url + coin_metrics_api_key + query_string)
    response = coin_metrics_session.get(coin_metrics_url + coin_metrics_api_key + query_string, verify=False)
    #print(response.text)
    return json.loads(response.text)

In [None]:
df = pd.DataFrame(get_coin_metrics_assets())
assets = list(df.assets.values)
for a in ['btc']: #assets[0:1]:
    asset_data = pd.DataFrame(get_coin_metrics_asset_info(a))
    for x in asset_data[0:1]:
        metrics = pd.DataFrame(get_coin_metrics_data(asset_data[x].id, ','.join(list(asset_data[x].metrics)))).reset_index()
        print(asset_data[x].id)
        time_series = pd.DataFrame(columns=metrics[metrics['index']=='metrics']['metricData'][0])
        x = pd.DataFrame(metrics[metrics['index']=='series'])['metricData'].values
        times = []
        for z in [k for k in x[0]]:
            for y,v in z.items():
                if isinstance(v, str):
                    times.append(v)
                else:
                    time_series.loc[len(time_series)] = v
                    
        time_series['times'] = times
        time_series['OneDayReturn'] = time_series.PriceUSD.astype('float').pct_change(1)
        time_series.loc[0, 'NextDayReturn'] =  time_series.loc[0, 'OneDayReturn']
        for i in range(1, len(time_series)):
            try:
                time_series.loc[i, 'NextDayReturn'] = time_series.loc[i+1, 'OneDayReturn']
            except:
                time_series.loc[i, 'NextDayReturn'] = 0
        print(time_series[['times', 'OneDayReturn', 'NextDayReturn']].tail(50))
        relevant_features = ['AdrActCnt', 'CapAct1yrUSD', 'CapRealUSD', 'CapMVRVCur', 
                        'HashRate', 'SplyAct1yr', 'NVTAdj', 'NVTAdj90','TxCnt', 'TxTfr','TxTfrValAdjUSD', 
                        'TxTfrValUSD', 'TxTfrValDayDst', 'TxTfrValDayDstMean' ,'UTXOCnt', 'VelActAdj1yr', 'VelCurAdj1yr']
        for feature in relevant_features:
            time_series[feature] = time_series[feature].astype('float64')
            if time_series[feature].dtype == np.dtype('float64'):
                time_series[feature + '_SCALED'] = scale(pp.scale, time_series[feature])
                scatter_plot(time_series, feature + '_SCALED', 'NextDayReturn')
                for lag in range(1,3):
                    time_series[feature + f'_{lag}_DAY_LAG'] = time_series[feature].pct_change(lag)
                    scatter_plot(time_series, feature + f'_{lag}_DAY_LAG', 'NextDayReturn')
            #else:
            #    scatter_plot(time_series, feature, 'OneDayReturn')
    time_series.drop(columns=relevant_features + ['times'], inplace=True)
    time_series.replace([np.nan, np.inf, -np.inf], 0, inplace=True)
    print(time_series.describe())
     
    [errors, linear_model, score, intercept] = run_linear_regression(time_series, 'NextDayReturn')
    print(linear_model.get_params(deep=True))
    print(errors)
    print(score)
    print(intercept)
#relevant_metrics = ['TransferValueAdjusted/Raw', 'txCounts', 'UTXO', 'ActiveAddresses', 'NVT (Ratio of Price to Transaction Volume)'
#macro cycle? Current?

Configure Data Connections

In [None]:
def encode(encoder, data, reshape=False):
    if reshape:
        data = np.array(data).reshape(-1,1) #do not reshape for label encoding, reshape for to 1D array for OHE
    return encoder.fit_transform(data)

In [None]:
def scale(scaler, data):
    return scaler(data)    

In [None]:
def run_linear_regression(data, target):
    Xs = data.drop([target], axis=1)
    y = data[target].values.reshape(-1, 1)
    lin_reg = LinearRegression().fit(Xs, y)
    MSEs = cross_val_score(lin_reg, Xs, y, scoring='neg_mean_squared_error', cv=5)
    score = lin_reg.score(Xs, y)
    #lin_reg = lin_reg.fit(Xs, y)
    return MSEs, lin_reg, score, lin_reg.intercept_

In [None]:
def run_ridge_regression(data, target, alphas={'alpha':[1e-15, 1e-10, 1e-8, 1e-4, 1e-3, 1e-2, 1, 5, 10, 20]}):
    Xs = data.drop([target], axis=1)
    y = data[target].values.reshape(-1, 1)
    ridge = Ridge()
    ridge_regression = GridSearchCV(ridge, alphas, scoring='neg_mean_squared_error', cv=5)
    ridge_regression.fit(Xs, y)
    print(ridge_regression.best_params_)
    print(ridge_regression.best_score_)

In [None]:
def run_lasso_regression(data, target, alphas={'alpha':[1e-15, 1e-10, 1e-8, 1e-4, 1e-3, 1e-2, 1, 5, 10, 20]}):
    Xs = data.drop([target], axis=1)
    y = data[target].values.reshape(-1, 1)
    lasso = Lasso()
    lasso_regression = GridSearchCV(lasso, alphas, scoring='neg_mean_squared_error', cv=5)
    lasso_regression.fit(Xs, y)
    print(lasso_regression.best_params_)
    print(lasso_regression.best_score_)