In [31]:
# CELL 1 – Imports (no TensorFlow = no problems)
import pandas as pd
import numpy as np
import ta
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")

In [7]:
# CELL 2 – Load data
data = pd.read_csv('/Users/josephmutui/Desktop/LuxDev/Python-class/crypto-classifier/data/processed/crypto_labeled_data.csv')
print("Data loaded – rows:", len(data))

Data loaded – rows: 800


In [32]:
# CELL 3 – Winning weekly + trend filter setup
data['open_time'] = pd.to_datetime(data['open_time'])
data = data.sort_values('open_time').reset_index(drop=True)

# Features
data['rsi'] = ta.momentum.RSIIndicator(data['close'], 14).rsi()
data['atr_ratio'] = ta.volatility.AverageTrueRange(data['high'], data['low'], data['close'], 14).average_true_range() / data['close']
data['volume_ratio'] = data['volume'] / data['volume'].rolling(20).mean()
data['return_7d'] = data['close'].pct_change(7)
data['adx'] = ta.trend.ADXIndicator(data['high'], data['low'], data['close'], 14).adx()

# Weekly label
data['future_week_return'] = data['close'].pct_change(7).shift(-7)
threshold = 0.03
data['label'] = 1
data.loc[data['future_week_return'] > threshold, 'label'] = 2   # BUY
data.loc[data['future_week_return'] < -threshold, 'label'] = 0   # SELL

# Only strong trends
data = data[data['adx'] > 25].dropna().reset_index(drop=True)

# Split
split = int(len(data) * 0.8)
train = data.iloc[:split]
test  = data.iloc[split:]

drop_cols = ['open','high','low','close','open_time','close_time','future_return','label_name','future_week_return']
X_train = train.drop(columns=drop_cols + ['label'])
y_train = train['label']
X_test  = test.drop(columns=drop_cols + ['label'])
y_test  = test['label']

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

print(f"Final test weeks: {len(test)} | Labels: {y_test.value_counts().sort_index().to_dict()}")

Final test weeks: 12 | Labels: {1: 2, 2: 10}


In [33]:
# CELL 4 – FINAL COMPARISON (4 fast models only – 100% stable)
results = []

def evaluate(name, pred):
    report = classification_report(y_test, pred, output_dict=True, zero_division=0)
    results.append({
        'Model': name,
        'Accuracy': round(report['accuracy'], 3),
        'F1_SELL': round(report.get('0', {'f1-score':0})['f1-score'], 3),
        'F1_HOLD': round(report.get('1', {'f1-score':0})['f1-score'], 3),
        'F1_BUY' : round(report.get('2', {'f1-score':0})['f1-score'], 3),
        'Macro F1': round(report['macro avg']['f1-score'], 3)
    })
    print(f"\n=== {name} ===")
    print(classification_report(y_test, pred, zero_division=0))

# Run the models
CatBoostClassifier(auto_class_weights='Balanced', verbose=False, random_state=42).fit(X_train, y_train)
evaluate("CatBoost", CatBoostClassifier(auto_class_weights='Balanced', verbose=False, random_state=42).fit(X_train, y_train).predict(X_test))

LGBMClassifier(class_weight='balanced', random_state=42, verbosity=-1).fit(X_train, y_train)
evaluate("LightGBM", LGBMClassifier(class_weight='balanced', random_state=42, verbosity=-1).fit(X_train, y_train).predict(X_test))

XGBClassifier(eval_metric='mlogloss', random_state=42).fit(X_train, y_train)
evaluate("XGBoost", XGBClassifier(eval_metric='mlogloss', random_state=42).fit(X_train, y_train).predict(X_test))

LogisticRegression(class_weight='balanced', max_iter=1000).fit(X_train_scaled, y_train)
evaluate("Logistic", LogisticRegression(class_weight='balanced', max_iter=1000).fit(X_train_scaled, y_train).predict(X_test_scaled))

# Show final ranking
df = pd.DataFrame(results).sort_values('Macro F1', ascending=False).reset_index(drop=True)
print("\n" + "="*80)
print("               FINAL WEEKLY BITCOIN STRATEGY")
print("="*80)
print(df.to_string(index=False))

winner = df.iloc[0]['Model']
print(f"\nWINNER: {winner}")

# Live signal
latest = X_test.tail(1).copy()
if winner == "CatBoost":
    signal = int(CatBoostClassifier(auto_class_weights='Balanced', verbose=False, random_state=42).fit(X_train, y_train).predict(latest)[0])
elif winner == "LightGBM":
    signal = int(LGBMClassifier(class_weight='balanced', random_state=42, verbosity=-1).fit(X_train, y_train).predict(latest)[0])
elif winner == "XGBoost":
    signal = int(XGBClassifier(eval_metric='mlogloss', random_state=42).fit(X_train, y_train).predict(latest)[0])
else:
    signal = int(LogisticRegression(class_weight='balanced', max_iter=1000).fit(X_train_scaled, y_train).predict(scaler.transform(latest))[0])

print("\nLIVE SIGNAL NEXT 7 DAYS (Dec 08 – Dec 15, 2025):")
print("→→→", ["STRONG SELL", "HOLD", "STRONG BUY"][signal], "←←←")


=== CatBoost ===
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         2
           2       1.00      0.50      0.67        10

    accuracy                           0.42        12
   macro avg       0.33      0.17      0.22        12
weighted avg       0.83      0.42      0.56        12


=== LightGBM ===
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         2
           2       1.00      0.10      0.18        10

    accuracy                           0.08        12
   macro avg       0.33      0.03      0.06        12
weighted avg       0.83      0.08      0.15        12


=== XGBoost ===
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         2
           2       1.0