In [None]:
import numpy as np
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
data_train = '/kaggle/input/g-research-crypto-forecasting/train.csv'
data_asset_details = '/kaggle/input/g-research-crypto-forecasting/asset_details.csv'
data_supplemental_train = '/kaggle/input/g-research-crypto-forecasting/supplemental_train.csv'

In [None]:
df_train = pd.read_csv(data_train, 
                       dtype={'Asset_ID': 'int8', 'Count': 'int32', 'row_id': 'int32', 'Count': 'int32', 
                              'Open': 'float64', 'High': 'float64', 'Low': 'float64', 'Close': 'float64', 
                              'Volume': 'float64', 'VWAP': 'float64'
                             }
                      )
df_train.head()

In [None]:
import gresearch_crypto

In [None]:
df_assets = pd.read_csv(data_asset_details).sort_values(by='Asset_ID')
df_assets.head()

## Data preprocessing

In [None]:
def upper_shadow(df):
    return df['High'] - np.maximum(df['Close'], df['Open'])

def lower_shadow(df):
    return np.minimum(df['Close'], df['Open']) - df['Low']

def get_features(df):
    df_feat = df[['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP']].copy()
    df_feat['Upper_Shadow'] = upper_shadow(df_feat)
    df_feat['Lower_Shadow'] = lower_shadow(df_feat)
    
    df_feat["Close/Open"] = df_feat["Close"] / df_feat["Open"] 
    df_feat["Close-Open"] = df_feat["Close"] - df_feat["Open"] 
    df_feat["High-Low"] = df_feat["High"] - df_feat["Low"] 
    df_feat["High/Low"] = df_feat["High"] / df_feat["Low"]
    
    df_feat['Mean'] = df_feat[['Open', 'High', 'Low', 'Close']].mean(axis=1)

    df_feat["Median"] = df_feat[["Open", "High", "Low", "Close"]].median(axis=1)
    return df_feat

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
from skopt import gp_minimize
from skopt.space import Real, Integer
from sklearn.model_selection import train_test_split
from functools import partial

In [None]:
def m(x, w):
    return np.sum(x * w) / np.sum(w)

def cov(x, y, w):
    return np.sum(w * (x - m(x, w)) * (y - m(y, w))) / np.sum(w)

def wcc(x, y, w=2.079441541679836):
    return cov(x, y, w) / np.sqrt(cov(x, x, w) * cov(y, y, w))

In [None]:
df_train['Asset_ID'].value_counts()

In [None]:
weight = df_assets[df_assets['Asset_ID'] == 7]['Weight'].values[0]

In [None]:
weight

In [None]:
df_train['DateAndTime'] = pd.to_datetime(df_train['timestamp'], unit='s',utc = True,infer_datetime_format = True,).apply(lambda x:x.tz_convert('Europe/London'))
df_train['Date'] = df_train.DateAndTime.dt.date
df_train['Date'] = df_train['Date'].astype('datetime64[ns]')
df_train.set_index(['DateAndTime'], inplace=True)
df_train.head()

In [None]:
start_date = '2020-01-01'
end_date = '2021-09-21'
data_training = df_train[df_train['Date'] >= '2020-01-01'].copy()
df_train = data_training

In [None]:
df = df_train[df_train['Asset_ID'] == 7].reset_index(drop=True)

In [None]:
df_proc = get_features(df)
df_proc['y'] = df['Target']
df_proc = df_proc.dropna(how="any")
df_proc = df_proc.drop(labels=np.where(np.isinf(df_proc))[0], axis=0)

X = df_proc.drop("y", axis=1)
y = df_proc["y"]

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
col_names = df_proc.drop('y', axis=1).columns

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.75, random_state=42)
target_col = "y"
df_train = pd.DataFrame(X_train, columns=col_names)
df_test = pd.DataFrame(X_test, columns=col_names)

## Best params finding

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
grid={"alpha": [0.6,0.8,1.0,1.2], 
      'solver':['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs']}
ridge=Ridge(max_iter = 15000, tol=1e-6)
ridge_cv=GridSearchCV(ridge,grid,cv=5)
ridge_cv.fit(X_train, y_train)

print("tuned hpyerparameters :(best parameters) ",ridge_cv.best_params_)
print("WCC :",ridge_cv.best_score_)

prediction = ridge_cv.predict(X_test)


print('WCC - ',wcc(prediction, y_test))

<br>

## Result

Timeout after 9 hours