In [None]:
import pandas as pd 
import numpy as np 
from lightgbm import LGBMRegressor
from sklearn.linear_model import LinearRegression
import time 
import datetime
from sklearn.svm import SVR
import gresearch_crypto

In [None]:
def read_csv_strict(file_name='/kaggle/input/g-research-crypto-forecasting/train.csv'):
    df = pd.read_csv(file_name)
    df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
    train_df = df[df['datetime'] < '2021-06-13 00:00:00']
    test_df = df[df['datetime'] >= '2021-06-13 00:00:00']
    return train_df, test_df

def weighted_correlation(a, b, weights):
    w = np.ravel(weights)
    a = np.ravel(a)
    b = np.ravel(b)

    sum_w = np.sum(w)
    mean_a = np.sum(a * w) / sum_w
    mean_b = np.sum(b * w) / sum_w
    var_a = np.sum(w * np.square(a - mean_a)) / sum_w
    var_b = np.sum(w * np.square(b - mean_b)) / sum_w

    cov = np.sum((a * b * w)) / np.sum(w) - mean_a * mean_b
    corr = cov / np.sqrt(var_a * var_b)

    return corr

In [None]:
TRAIN_CSV = '../input/g-research-crypto-forecasting/train.csv'
ASSET_DETAILS_CSV = '../input/g-research-crypto-forecasting/asset_details.csv'

In [None]:
df_train, df_test = read_csv_strict()

In [None]:
df_asset_details = pd.read_csv(ASSET_DETAILS_CSV).sort_values("Asset_ID")
df_asset_details

In [None]:
df_train.iloc[2,:]

In [None]:
# Two new features from the competition tutorial
def upper_shadow(df):
    return df['High'] - np.maximum(df['Close'], df['Open'])

def lower_shadow(df):
    return np.minimum(df['Close'], df['Open']) - df['Low']

# A utility function to build features from the original df
# It works for rows to, so we can reutilize it.
def get_features(df,row=False):
    df_feat = df[['timestamp','Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP','datetime']].copy()
    df_feat['output']=pd.np.nan
    for i in range (len(df_feat)-1):
        df_feat['output'].iloc[i]=df_feat['Close'].iloc[i+1]
    
    return df_feat

In [None]:
df = df_train[df_train["Asset_ID"] == 0]
df_proc = get_features(df)

In [None]:
df_proc.columns

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

In [None]:
df_proc.head()

In [None]:
totimestamp = lambda s: np.int32(time.mktime(datetime.datetime.strptime(s, "%d/%m/%Y").timetuple()))
valid_window = [totimestamp("12/06/2021")]

In [None]:
train=df_proc[df_proc['timestamp']<valid_window[0]]

In [None]:
train.tail()

In [None]:
valid=df_proc[df_proc['timestamp']>=valid_window[0]]

In [None]:
valid.head()

In [None]:
train=train.replace([np.inf, -np.inf], np.nan).dropna(how="any")
valid=valid.replace([np.inf, -np.inf], np.nan).dropna(how="any")

X_train=train.drop(["datetime",'timestamp','output','Close'],axis=1)
y_train=train['output']
X_test=valid.drop(["datetime",'timestamp','output','Close'],axis=1)
y_test=valid['output']

    # TODO: Try different models here!
model = LGBMRegressor()
model.fit(X_train, y_train)
pred = model.predict(X_test)

MSE =sqrt(mean_squared_error(y_test,pred))
print('MSE: %.3f' % MSE)


In [None]:
valid['pred']=pred

In [None]:
valid['output']=y_test

In [None]:
valid.head()

In [None]:
btc = valid.set_index("datetime")
btc_mini = btc.iloc[-200:]


In [None]:
btc_mini

In [None]:
import plotly.graph_objects as goµ

fig = go.Figure(data=[go.Candlestick(x=btc_mini.index, open=btc_mini['Open'], high=btc_mini['High'], low=btc_mini['Low'], close=btc_mini['Close'])])
fig.show()

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Candlestick(x=btc_mini.index, open=btc_mini['Open'], high=btc_mini['High'], low=btc_mini['Low'], close=btc_mini['output'])])
fig.show()