In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [29]:
df = pd.read_csv('crypto.csv', header=[0, 1], index_col=0, parse_dates=[0])
df.head()

Price,close,close,close,close,close,close,High,High,High,High,...,Open,Open,Open,Open,volume,volume,volume,volume,volume,volume
Ticker,ada,btc,doge,eth,sol,xrp,ada,btc,doge,eth,...,doge,eth,sol,xrp,ada,btc,doge,eth,sol,xrp
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-01-01,0.033458,7200.174316,0.002033,130.802002,,0.192667,0.033813,7254.330566,0.002052,132.835358,...,0.002028,129.630661,,0.192912,22948374,18565664997,51180941,7935230330,,1041134003
2020-01-02,0.032751,6985.470215,0.002009,127.410179,,0.188043,0.033507,7212.155273,0.00211,130.820038,...,0.002034,130.820038,,0.192708,20843934,20802083465,65071106,8032709256,,1085351426
2020-01-03,0.03418,7344.884277,0.002145,134.171707,,0.193521,0.034427,7413.715332,0.002177,134.554016,...,0.002008,127.411263,,0.187948,30162644,28111481032,62619988,10476845358,,1270017043
2020-01-04,0.034595,7410.656738,0.002241,135.069366,,0.194355,0.034685,7427.385742,0.00249,136.052719,...,0.002144,134.168518,,0.193521,29535781,18444271275,94227582,7430904515,,999331594
2020-01-05,0.034721,7411.317383,0.002419,136.276779,,0.195537,0.035356,7544.49707,0.002491,139.410202,...,0.00224,135.072098,,0.194367,21479178,19725074095,52631740,7526675353,,1168067557


In [3]:
df.columns = ['_'.join(col).strip().lower() for col in df.columns].copy()
# df.head()

In [4]:
df = df.dropna().copy()
# df.head()

In [5]:
df = df[[col for col in df.columns if col.startswith('close_')]].copy()
# df.head()

In [6]:
df = df.reset_index().drop('Date', axis=1).copy()
# df.head()

In [7]:
for lag in [1, 7, 60]:
    for col in df.columns:
        df[f'{col}_lag{lag}'] = df[col].shift(lag)


In [8]:
df = df.dropna().copy()
# df.head()

In [9]:
df['target_btc_7'] = df['close_btc'].shift(-7)
df['target_btc_60'] = df['close_btc'].shift(-60)
features = [col for col in df.columns if col.startswith('close') and col != 'close_btc']
X = df[features]
y_7 = df['target_btc_7'].interpolate()
y_60 = df['target_btc_60'].interpolate()

In [10]:
print("X shape:", X.shape)
print("y_7 shape:", y_7.shape)
print("y_60 shape:", y_60.shape)

X shape: (1749, 47)
y_7 shape: (1749,)
y_60 shape: (1749,)


In [11]:
from sklearn.model_selection import train_test_split

In [12]:
X_train_7, X_test_7, y_train_7, y_test_7 = train_test_split(X, y_7, test_size=0.3, random_state=101)
X_train_60, X_test_60, y_train_60, y_test_60 = train_test_split(X, y_60, test_size=0.3, random_state=101)

In [13]:
from sklearn.preprocessing import StandardScaler

In [14]:
scaler = StandardScaler()
scaler.fit(X_train_60)

X_train_scaled = scaler.transform(X_train_60)
X_test_scaled = scaler.transform(X_test_60)

In [15]:
from sklearn.preprocessing import PolynomialFeatures

In [16]:
poly_converter = PolynomialFeatures(degree=2, include_bias=False)
poly_converter.fit(X_train_scaled)

X_train_poly = poly_converter.transform(X_train_scaled)
X_test_poly = poly_converter.transform(X_test_scaled)
print(f'X train poly: {X_train_poly.shape}, X test poly: {X_test_poly.shape}')

X train poly: (1224, 1175), X test poly: (525, 1175)


In [17]:
from sklearn.linear_model import ElasticNetCV
from sklearn.model_selection import TimeSeriesSplit

In [18]:
tscv = TimeSeriesSplit(n_splits=5)
model = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], n_alphas=10, max_iter=2000000,
                    cv=tscv, tol=0.005, random_state=101)
model.fit(X_train_poly, y_train_60)

In [19]:
train_prediction = model.predict(X_train_poly)
test_prediction = model.predict(X_test_poly)

In [20]:
from sklearn.metrics import mean_squared_error

In [21]:
train_RMSE = np.sqrt(mean_squared_error(y_train_60, train_prediction))
test_RMSE = np.sqrt(mean_squared_error(y_test_60, test_prediction))

In [22]:
from sklearn.model_selection import cross_val_score

In [23]:
cv_scores = -cross_val_score(model, X_train_poly, y_train_60, cv=4, scoring='neg_mean_squared_error')
cv_RMSE = np.sqrt(np.mean(cv_scores))

In [24]:
print(f'Train RMSE: {train_RMSE: .4f}, Test RMSE: {test_RMSE: .4f}, CV RMSE: {cv_RMSE: .4f}, DIFF: {(test_RMSE - train_RMSE): .4f}')

Train RMSE:  4831.1058, Test RMSE:  5161.2188, CV RMSE:  5533.1759, DIFF:  330.1130


In [25]:
from joblib import dump

In [26]:
dump(model, 'elastic_net_60_btc.joblib')
dump(scaler, 'scaler_60_btc.joblib')
dump(poly_converter, 'poly_60_btc.joblib')

['poly_60_btc.joblib']