In [None]:
# Cell 1: Imports
from src.data_download import CACHE_FILE
from src.features import calculate_all_features
from src.scaling import scale_all_features
from src.train_split import prepare_train_test_split
from src.model_random_forest import *

# Cell 2: Load data
cached_data = pd.read_pickle(CACHE_FILE)
historical_data = cached_data['bitcoin_data']
fgi_data = cached_data['fgi_data']

# Cell 3: Calculate features
features_df = calculate_all_features(historical_data, fgi_data)
print(f"Features calculated: {features_df.shape}")

# Cell 4: Create targets
targets_df = create_prediction_targets(historical_data)
print(f"Targets created: {targets_df.columns.tolist()}")

# Cell 5: Prepare ML features
ml_features = prepare_features_for_random_forest(features_df)
print(f"ML features: {ml_features.shape}")

# Cell 6: Scale features
scaled_features = scale_all_features(ml_features)
standardised_features = scaled_features['features_standardised']

# Cell 7: Combine and split
# how='inner' means: only keep rows (dates) that exist in BOTH DataFrames.
combined_features_targets = standardised_features.join(targets_df, how='inner')

train_test_datasets = {}
for days_ahead in [1, 7, 30]:
    train_test_datasets[days_ahead] = prepare_train_test_split(
        combined_features_targets, 
        f'Target_{days_ahead}day'
    )

# Cell 8: Train models
results = {}
for days_ahead in [1, 7, 30]:
    print(f"\nTraining {days_ahead}-day model...")
    dataset = train_test_datasets[days_ahead]
    
    model = train_random_forest_model(dataset['X_train'], dataset['y_train'])
    performance = evaluate_model_performance(model, dataset['X_test'], dataset['y_test'])
    
    print(f"  RMSE: ${performance['rmse']:,.2f}")
    print(f"  MAE:  ${performance['mae']:,.2f}")
    
    save_trained_model(model, days_ahead)
    results[days_ahead] = {'model': model, 'performance': performance}

# Cell 9+: Your visualizations