
# **MACHINE LEARNING: NEXT DAY BTC CRYPTO PRICE PREDICTION**

#**Fetch BTC-USD historical data**

In [None]:
import yfinance as yf
import pandas as pd

# Download BTC-USD daily history
ticker = yf.Ticker("BTC-USD")
hist = ticker.history(start="2015-01-01", end="2026-02-13", interval="1d")

# Use only closing price
data = hist[['Close']].rename(columns={'Close':'close'})

data.tail()


Unnamed: 0_level_0,close
Date,Unnamed: 1_level_1
2026-02-08 00:00:00+00:00,70264.726562
2026-02-09 00:00:00+00:00,70120.78125
2026-02-10 00:00:00+00:00,68793.960938
2026-02-11 00:00:00+00:00,66991.96875
2026-02-12 00:00:00+00:00,66221.84375


#**Feature Engineering**

In [None]:
import numpy as np

# Create lag features e.g., t-1, t-2,... t-7
for lag in range(1, 8):
    data[f'lag_{lag}'] = data['close'].shift(lag)

# Simple moving average (SMA) and other features
data['SMA_7'] = data['close'].rolling(window=7).mean()
data['SMA_14'] = data['close'].rolling(window=14).mean()

# Drop nulls from shifts/rolling
data.dropna(inplace=True)


#**Train/Predict XGBoost Model**

In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split

X = data.drop(columns=['close'])
y = data['close']

# last day to predict = Feb 16, 2026
train_X, test_X, train_y, test_y = train_test_split(X, y, shuffle=False, test_size=30)

model = XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=5
)
model.fit(train_X, train_y)

# Forecast 16 Feb 2026
pred_16feb = model.predict(test_X[-1:].values)[0]
print(f"Predicted BTC price for 16 Feb 2026: {pred_16feb:.2f} USD")


Predicted BTC price for 16 Feb 2026: 67335.05 USD


#**Actual BTC Price on 16 Feb 2026**

In [None]:
import yfinance as yf
import pandas as pd

ticker = yf.Ticker("BTC-USD")

hist = ticker.history(
    start="2026-02-15",
    end="2026-02-17",
    interval="1d"
)

# Remove timezone for cleaner display
hist.index = hist.index.tz_localize(None)

# Select only the Close column
data = hist[['Close']]

# Filter to just the target date
result = data.loc["2026-02-16"]

result


Unnamed: 0,2026-02-16
Close,68843.15625


#**Compare Prediction vs Actual**

In [None]:
predicted_price = pred_16feb
actual_price = result['Close']

error = actual_price - predicted_price
percentage_error = (error / actual_price) * 100

print(f"Predicted BTC price for 2026-02-16: {predicted_price:.2f} USD")
print(f"Actual BTC price for 2026-02-16:   {actual_price:.2f} USD")
print(f"Difference: {error:.2f} USD")
print(f"Percentage Error: {percentage_error:.2f}%")

if abs(percentage_error) < 1:
    print("\nAccuracy: Excellent! (Error < 1%)")
elif abs(percentage_error) < 5:
    print("\nAccuracy: Good (Error < 5%)")
elif abs(percentage_error) < 10:
    print("\nAccuracy: Acceptable (Error < 10%)")
else:
    print("\nAccuracy: Poor (Error >= 10%)")

Predicted BTC price for 2026-02-16: 67335.05 USD
Actual BTC price for 2026-02-16:   68843.16 USD
Difference: 1508.10 USD
Percentage Error: 2.19%

Accuracy: Good (Error < 5%)


# **Tuned XGBoost for better performance**

In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV

# ---- 1. Time series split (important for financial data) ----
tscv = TimeSeriesSplit(n_splits=3)

# ---- 2. Parameter grid to test ----
param_grid = {
    "n_estimators": [300, 500, 800],
    "max_depth": [3, 5, 7],
    "learning_rate": [0.01, 0.05, 0.1],
    "subsample": [0.8, 1.0],
    "colsample_bytree": [0.8, 1.0]
}

# ---- 3. Base model ----
xgb = XGBRegressor(objective="reg:squarederror")

# ---- 4. Grid search ----
grid = GridSearchCV(
    estimator=xgb,
    param_grid=param_grid,
    cv=tscv,
    scoring="neg_mean_absolute_error",
    verbose=1
)

# ---- 5. Fit ----
grid.fit(train_X, train_y)

# ---- 6. Best model ----
best_model = grid.best_estimator_

print("Best Parameters Found:")
print(grid.best_params_)


Fitting 3 folds for each of 108 candidates, totalling 324 fits
Best Parameters Found:
{'colsample_bytree': 0.8, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 800, 'subsample': 1.0}


In [None]:
predicted_price = best_model.predict(test_X[-1:].values)[0]
print(f"Predicted BTC price for 16 Feb 2026: {predicted_price:.2f} USD")


Predicted BTC price for 16 Feb 2026: 70088.73 USD


# **Bitcoin Forecast: Original vs Tuned XGBoost**

In [None]:
import pandas as pd

# Data
date = "2026-02-16"
actual_price = 68843.16

# First prediction (before tuning)
pred1 = 67335.05
diff1 = pred1 - actual_price
error1 = abs(diff1) / actual_price * 100

# Second prediction (after tuning)
pred2 = 70088.73
diff2 = pred2 - actual_price
error2 = abs(diff2) / actual_price * 100

# Create DataFrame
comparison_df = pd.DataFrame({
    "Date": [date, date],
    "Model": ["Original XGBoost", "Tuned XGBoost"],
    "Predicted Price (USD)": [pred1, pred2],
    "Actual Price (USD)": [actual_price, actual_price],
    "Difference (USD)": [diff1, diff2],
    "Percentage Error (%)": [error1, error2]
})

comparison_df


Unnamed: 0,Date,Model,Predicted Price (USD),Actual Price (USD),Difference (USD),Percentage Error (%)
0,2026-02-16,Original XGBoost,67335.05,68843.16,-1508.11,2.190646
1,2026-02-16,Tuned XGBoost,70088.73,68843.16,1245.57,1.809286


# **Bitcoin Forecast: Tuned + Retrained XGBoost**

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit

# 1. Get BTC daily data
ticker = yf.Ticker("BTC-USD")
hist = ticker.history(start="2015-01-01", end="2026-02-16")  # all history up to target
hist.index = hist.index.tz_localize(None)

# 2. Feature Engineering
data = hist[['Close', 'Volume']].copy()
data['Return'] = data['Close'].pct_change()
data['MA7'] = data['Close'].rolling(7).mean()
data['MA14'] = data['Close'].rolling(14).mean()
data['Volatility'] = data['Close'].rolling(7).std()
data['MA_Volume7'] = data['Volume'].rolling(7).mean()

# Drop rows with NaN
data = data.dropna()

# 3. Prepare train/test
features = ['Close', 'Volume', 'Return', 'MA7', 'MA14', 'Volatility', 'MA_Volume7']
target = 'Close'

X = data[features].shift(1).dropna()  # shift 1 for next day prediction
y = data[target].loc[X.index]

# Train last day is test
train_X = X.iloc[:-1]
train_y = y.iloc[:-1]
test_X = X.iloc[-1:]

# 4. Use tuned XGBoost parameters
best_model = XGBRegressor(
    n_estimators=800,
    max_depth=3,
    learning_rate=0.01,
    subsample=1.0,
    colsample_bytree=0.8,
    objective='reg:squarederror'
)

# 5. TimeSeries split (optional, not CV for one-day prediction)
tscv = TimeSeriesSplit(n_splits=3)

# 6. Train model
best_model.fit(train_X, train_y)

# 7. Predict next day
predicted_price = best_model.predict(test_X.values)[0]

print(f"Tuned + retrained XGBoost predicted BTC price for 16 Feb 2026: {predicted_price:.2f} USD")

Tuned + retrained XGBoost predicted BTC price for 16 Feb 2026: 69883.40 USD


In [None]:
import pandas as pd

# Actual price
actual_price = 68843.16

# Date
date = "2026-02-16"

# Predictions
original_pred = 67335.05
tuned_pred = 70088.73
retrained_pred = 69883.40

# Differences
diff_original = original_pred - actual_price
diff_tuned = tuned_pred - actual_price
diff_retrained = retrained_pred - actual_price

# Percentage errors
error_original = abs(diff_original) / actual_price * 100
error_tuned = abs(diff_tuned) / actual_price * 100
error_retrained = abs(diff_retrained) / actual_price * 100

# Build comparison DataFrame
comparison_df = pd.DataFrame({
    "Date": [date]*3,
    "Model": ["Original XGBoost", "Tuned XGBoost", "Tuned + Retrained XGBoost"],
    "Predicted Price (USD)": [original_pred, tuned_pred, retrained_pred],
    "Actual Price (USD)": [actual_price]*3,
    "Difference (USD)": [diff_original, diff_tuned, diff_retrained],
    "Percentage Error (%)": [error_original, error_tuned, error_retrained]
})

comparison_df


Unnamed: 0,Date,Model,Predicted Price (USD),Actual Price (USD),Difference (USD),Percentage Error (%)
0,2026-02-16,Original XGBoost,67335.05,68843.16,-1508.11,2.190646
1,2026-02-16,Tuned XGBoost,70088.73,68843.16,1245.57,1.809286
2,2026-02-16,Tuned + Retrained XGBoost,69883.4,68843.16,1040.24,1.511029


# **Bitcoin Forecast: Enhanced Features XGBoost**

In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor

# -------------------------------
# 1. Use existing hist DataFrame
# -------------------------------
# hist should already have 'Close' and 'Volume'

data = hist[['Close', 'Volume']].copy()

# -------------------------------
# 2. Feature Engineering
# -------------------------------
# Moving Averages
data['MA3'] = data['Close'].rolling(3).mean()
data['MA7'] = data['Close'].rolling(7).mean()
data['MA14'] = data['Close'].rolling(14).mean()
data['MA21'] = data['Close'].rolling(21).mean()

# Exponential Moving Averages
data['EMA7'] = data['Close'].ewm(span=7, adjust=False).mean()
data['EMA14'] = data['Close'].ewm(span=14, adjust=False).mean()

# Returns
data['Return'] = data['Close'].pct_change()

# Volatility
data['Volatility7'] = data['Close'].rolling(7).std()

# RSI (Relative Strength Index)
delta = data['Close'].diff()
gain = delta.clip(lower=0)
loss = -1 * delta.clip(upper=0)
avg_gain = gain.rolling(14).mean()
avg_loss = loss.rolling(14).mean()
rs = avg_gain / avg_loss
data['RSI'] = 100 - (100 / (1 + rs))

# MACD
EMA12 = data['Close'].ewm(span=12, adjust=False).mean()
EMA26 = data['Close'].ewm(span=26, adjust=False).mean()
data['MACD'] = EMA12 - EMA26

# Volume momentum
data['Volume_MA7'] = data['Volume'].rolling(7).mean()

# Drop rows with NaN created by rolling calculations
data = data.dropna()

# -------------------------------
# 3. Prepare features & target
# -------------------------------
features = ['Close', 'Volume', 'MA3', 'MA7', 'MA14', 'MA21',
            'EMA7', 'EMA14', 'Return', 'Volatility7', 'RSI', 'MACD', 'Volume_MA7']
target = 'Close'

# Shift features by 1 day to predict next day
X = data[features].shift(1).dropna()
y = data[target].loc[X.index]

# Train last day is test
train_X = X.iloc[:-1]
train_y = y.iloc[:-1]
test_X = X.iloc[-1:]

# -------------------------------
# 4. Retrain tuned XGBoost
# -------------------------------
best_model = XGBRegressor(
    n_estimators=800,
    max_depth=3,
    learning_rate=0.01,
    subsample=1.0,
    colsample_bytree=0.8,
    objective='reg:squarederror'
)

best_model.fit(train_X, train_y)

# -------------------------------
# 5. Predict next day (16 Feb 2026)
# -------------------------------
predicted_price = best_model.predict(test_X.values)[0]

# -------------------------------
# 6. Compare with actual price
# -------------------------------
actual_price = result['Close']  # last row in hist
difference = predicted_price - actual_price
percentage_error = abs(difference) / actual_price * 100

print(f"Tuned + Retrained XGBoost with Enhanced Features predicted BTC price for 16 Feb 2026: {predicted_price:.2f} USD")
print(f"Actual BTC price: {actual_price:.2f} USD")
print(f"Difference: {difference:.2f} USD")
print(f"Percentage Error: {percentage_error:.2f}%")




Tuned + Retrained XGBoost with Enhanced Features predicted BTC price for 16 Feb 2026: 69359.30 USD
Actual BTC price: 68843.16 USD
Difference: 516.15 USD
Percentage Error: 0.75%


In [None]:
# -------------------------------
# 7. Create clean comparison table
# -------------------------------
comparison_df = pd.DataFrame({
    "Date": [hist.index[-1].date()] * 4,
    "Model": ["Original XGBoost", "Tuned XGBoost", "Tuned + Retrained XGBoost", "Enhanced Features XGBoost"],
    "Predicted Price (USD)": [67335.05, 70088.73, 69883.40, predicted_price],
    "Actual Price (USD)": [actual_price]*4,
    "Difference (USD)": [
        67335.05 - actual_price,
        70088.73 - actual_price,
        69883.40 - actual_price,
        difference
    ],
    "Percentage Error (%)": [
        abs(67335.05 - actual_price)/actual_price*100,
        abs(70088.73 - actual_price)/actual_price*100,
        abs(69883.40 - actual_price)/actual_price*100,
        percentage_error
    ]
})

comparison_df

Unnamed: 0,Date,Model,Predicted Price (USD),Actual Price (USD),Difference (USD),Percentage Error (%)
0,2026-02-15,Original XGBoost,67335.05,68843.15625,-1508.10625,2.190641
1,2026-02-15,Tuned XGBoost,70088.73,68843.15625,1245.57375,1.809292
2,2026-02-15,Tuned + Retrained XGBoost,69883.4,68843.15625,1040.24375,1.511034
3,2026-02-15,Enhanced Features XGBoost,69359.304688,68843.15625,516.148438,0.749745


#**Save Enhanced Features XGBoost Model**

In [14]:
import joblib
from google.colab import drive

drive.mount('/content/drive')

# Save the model to Google Drive
model_path = "/content/drive/MyDrive/Btc-predictor/backend/btc_model.pkl"
joblib.dump(best_model, model_path)

print(f"Model saved successfully to {model_path}")

Mounted at /content/drive
Model saved successfully to /content/drive/MyDrive/Btc-predictor/backend/btc_model.pkl


#**Model Performance**

In [17]:
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np

# Since test_X has only 1 sample, calculate metrics on train_X instead
y_train_pred = best_model.predict(train_X.values)

# Calculate metrics on training data
train_mae = mean_absolute_error(train_y, y_train_pred)
train_r2 = r2_score(train_y, y_train_pred)

# Directional accuracy on training data
actual_dir = np.diff(train_y.values) > 0
pred_dir = np.diff(y_train_pred) > 0
dir_acc = (actual_dir == pred_dir).sum() / len(actual_dir) * 100

print(f"Train R²: {train_r2:.4f}")
print(f"Train MAE: ${train_mae:.2f}")
print(f"Directional Accuracy: {dir_acc:.1f}%")

# For the single test prediction
y_test_pred = best_model.predict(test_X.values)[0]
print(f"\nTest Prediction (Feb 16): ${y_test_pred:.2f}")

Train R²: 0.9989
Train MAE: $570.50
Directional Accuracy: 50.0%

Test Prediction (Feb 16): $69359.30
