# Linear Regression Training Notebook

This notebook trains a linear regression model to predict the Fear & Greed Index based on Bitcoin price data. It includes automated model versioning for managing challenger and champion models.

In [None]:

# Import required libraries
import polars as pl
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from pathlib import Path
import shutil
import json
from datetime import datetime


In [13]:
data_path = '../data/processed/fear_and_greed_history_5min.parquet'
df = pl.read_parquet(data_path).sort('interval_end_time')

# Prepare data for training
X = df.select('close_bitcoin_price_usd').to_numpy()
y = df.select('avg_actual_value').to_numpy().flatten()

# Train-test split (80%-20% split)
split_idx = int(0.8 * len(X))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]


In [14]:

# Train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.4f}')


Mean Squared Error: 9.0430


In [20]:

# Prepare weights DataFrame
weights_df = pl.DataFrame({
    'feature': ['intercept', 'btc_price'],
    'weight': [model.intercept_, model.coef_[0]]
})

# Model versioning
models_dir = Path('../models')
models_dir.mkdir(exist_ok=True)

# Save model metadata
model_metadata = {
    "version": datetime.now().strftime("%Y%m%d_%H%M%S"),
    "mse": mse
}

model_metadata_path = models_dir / "model_metadata.json"

# Check if a champion model exists
if model_metadata_path.exists():
    with open(model_metadata_path, 'r') as f:
        champion_metadata = json.load(f)
    
    if mse < champion_metadata["mse"]:
        print("New challenger model performs better. Updating champion model.")
        # Backup previous champion model
        shutil.copy(models_dir / "champion_model.csv", models_dir / f"backup_{champion_metadata['version']}.csv")
        
        # Save new champion model
        weights_df.write_csv(models_dir / "champion_model.csv")
        
        # Update metadata
        with open(model_metadata_path, 'w') as f:
            json.dump(model_metadata, f)
    else:
        print("Challenger model does not outperform champion model. Not updating.")
        weights_df.write_csv(models_dir / f"challenger_{model_metadata['version']}.csv")
else:
    print("No existing champion model. Saving this model as the champion model.")
    weights_df.write_csv(models_dir / "champion_model.csv")
    with open(model_metadata_path, 'w') as f:
        json.dump(model_metadata, f)


No existing champion model. Saving this model as the champion model.


### Model Output
The model weights are stored in `../../models/`. The champion model is maintained as `champion_model.csv`. If a challenger model performs better, the existing champion model is backed up and replaced.