## Bitcoin Price Prediction

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
    
import warnings
warnings.filterwarnings('ignore')

In [None]:
from preprocessing.data_preprocessor import FinancialDataPreprocessor

preprocessor = FinancialDataPreprocessor()

# change the data paths to the correct ones
preprocessor.load_data(
    bitcoin_path=r"data\Bitcoin Historical Data.csv",
    usd_path=r"data\dolar.csv",
    gold_path=r"data\XAU_USD Geçmiş Verileri.csv",
)

merged_df = preprocessor.merge_data()

merged_df.to_csv(
    r"data\merged_data.csv", index=False
)

data_info = preprocessor.get_data_info()
print("Data Shape:", data_info["shape"])
print("Date Range:", data_info["date_range"])
print("Unique Dates:", data_info["unique_dates"])
print("Missing Values:", data_info["missing_values"])
merged_df.head()

In [None]:
merged_df.tail()

### Corr matrix

In [None]:

correlation_matrix = merged_df.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm', cbar=True)
plt.title('Correlation Heatmap')
plt.show()


In [None]:
from visualization.data_eda import FinancialDataEDA

eda = FinancialDataEDA(merged_df)

time_series = eda.plot_time_series()
time_series.show()

In [None]:
correlation_matrix = eda.plot_correlation_matrix()
correlation_matrix.show()

In [None]:
# returns = eda.plot_returns_distribution()
# returns.show()

detailed_corr = eda.get_detailed_correlations()
print("\nTop 10 Strongest Correlations:")
print(detailed_corr.head(15))

In [None]:
# risk metrics
# risk_metrics = eda.generate_risk_metrics()
# for asset, metrics in risk_metrics.items():
#    print(f"\n{asset} Metrics:")
#    for metric, value in metrics.items():
#        print(f"{metric}: {value:.2f}")

In [None]:
from preprocessing.model_preprocessor import FinancialDataPreprocessor


preprocessor = FinancialDataPreprocessor(lookback_period=15)

processed_data = preprocessor.prepare_data(
    df=merged_df,
    train_start="2023-09-01",
    train_end="2024-09-14",
    test_start="2024-09-15",
    test_end="2024-09-30",
)

train_data = processed_data["train"]
test_data = processed_data["test"]

preprocessor.check_data_quality(train_data, test_data)

### Feature Selection

In [None]:
train_data.columns

In [None]:
test_data.columns

In [15]:
#selected_features = ['Date', 'Price','Open', 'High', 'Low', 'Vol.', 'Change %', 'usd_buy',
#'usd_sell', 'gold_Price', 'gold_Open', 'gold_High', 'gold_Low',
#'gold_Change', 'RSI', 'MA_7', 'EMA_7', 'MA_14',
#'EMA_14','Volume_MA','BTC_Gold_Ratio','BTC_USD_Ratio']

#'usd_sell', 'MA_14',

selected_features = ['Date', 'Price', 'High', 'usd_buy',
        'gold_Price', 'RSI', 'MA_7', 'BTC_Gold_Ratio','BTC_USD_Ratio']

train_data = train_data[selected_features]
test_data = test_data[selected_features]

In [None]:
correlation_matrix = train_data.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm', cbar=True)
plt.title('Correlation Heatmap')
plt.show()


### Models

In [None]:
from model.models import FinancialModelPipeline

pipeline = FinancialModelPipeline(train_data, test_data)
pipeline.train_and_evaluate()

In [None]:
from visualization.model_visualizations import ModelVisualizer

visualizer = ModelVisualizer(pipeline)

# model metrics
fig_metrics = visualizer.plot_model_metrics()
fig_metrics.show()

# best model
fig_best = visualizer.plot_best_model_predictions()
fig_best.show()

# all predictions
fig_all = visualizer.plot_all_predictions()
fig_all.show()

### Feature Importance

In [None]:
# Decision_Tree, Random_Forest, XGBoost, LightGBM, CatBoost, AdaBoost
fig_importance = visualizer.plot_feature_importance("LightGBM")
if fig_importance:
    fig_importance.show()

### Model Parametre Tuning

In [None]:
# tune selected models
# pipeline.tune_models(['Random_Forest', 'XGBoost'])

# tune all models
# pipeline.tune_models()

# tuning results
# pipeline.train_and_evaluate()

### LSTM

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error
from model.lstm_model import FinancialLSTM
from preprocessing.lstm_model_preprocessor import LSTMDataPreprocessor
from preprocessing.model_preprocessor import FinancialDataPreprocessor

df = pd.read_csv(r"data\merged_data.csv")
df["Date"] = pd.to_datetime(df["Date"])

sequence_length = 10

data_preprocessor = FinancialDataPreprocessor(lookback_period=15)
lstm_preprocessor = LSTMDataPreprocessor(
    sequence_length=sequence_length,
    target_column="Price",
    feature_columns=[
        "High",
        "Low",
        "Vol.",
        "MA_7",
        "RSI",
        "MACD",
        "BB_middle",
    ],
)

preprocessed_data = data_preprocessor.prepare_data(
    df=df,
    train_start="2023-01-01",
    train_end="2024-09-14",  
    test_start="2024-09-15",
    test_end="2024-09-30",
)

lstm_data = lstm_preprocessor.prepare_lstm_data(preprocessed_data)

model = FinancialLSTM(
    sequence_length=sequence_length,
    epochs=150,
    batch_size=32,
    lstm_units=[128, 64, 32],
    dropout_rate=0.1,
)

metrics = model.train(lstm_data)
print("\nTrain metrics:", metrics)

# sliding window
combined_data = pd.concat([
    preprocessed_data["train"].iloc[-sequence_length:],
    preprocessed_data["test"]
])

all_test_dates = preprocessed_data["test"]["Date"].values
all_test_prices = preprocessed_data["test"][lstm_preprocessor.target_column].values
all_test_predictions = np.zeros(len(all_test_dates))

# Sliding window prediction
for i in range(len(all_test_dates)):
    if i + sequence_length <= len(combined_data):
        window = combined_data.iloc[i:i+sequence_length]
        
        features = window[lstm_preprocessor.feature_columns].values
        
        scaled_features = lstm_preprocessor.feature_scaler.transform(features)
        
        scaled_pred = model.predict(np.array([scaled_features]))
        
        pred = lstm_preprocessor.inverse_transform_predictions(scaled_pred)
        
        all_test_predictions[i] = pred[0][0]

# scaled rmse
valid_indices = all_test_predictions != 0
y_true_scaled = lstm_preprocessor.target_scaler.transform(all_test_prices[valid_indices].reshape(-1, 1))
y_pred_scaled = lstm_preprocessor.target_scaler.transform(all_test_predictions[valid_indices].reshape(-1, 1))
scaled_rmse = np.sqrt(mean_squared_error(y_true_scaled, y_pred_scaled))
print(f"\nScaled RMSE: {scaled_rmse:.4f}")

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=all_test_dates, 
        y=all_test_prices, 
        name="Actual Price", 
        line=dict(color="blue", width=2)
    )
)

fig.add_trace(
    go.Scatter(
        x=all_test_dates[valid_indices],
        y=all_test_predictions[valid_indices],
        name="Predicton",
        line=dict(color="red", width=2, dash="dash"),
    )
)

error = np.abs(all_test_prices[valid_indices] - all_test_predictions[valid_indices])
error_band = np.std(error) * 2

fig.add_trace(
    go.Scatter(
        x=all_test_dates[valid_indices],
        y=all_test_predictions[valid_indices] + error_band,
        fill=None,
        mode="lines",
        line_color="rgba(255,0,0,0)",
        showlegend=False,
    )
)

fig.add_trace(
    go.Scatter(
        x=all_test_dates[valid_indices],
        y=all_test_predictions[valid_indices] - error_band,
        fill="tonexty",
        mode="lines",
        line_color="rgba(255,0,0,0)",
        fillcolor="rgba(255,0,0,0.2)",
        showlegend=False,
    )
)

fig.update_layout(
    title="Bitcoin Price - Actual vs Predicton (All 15 Days Test Period)",
    xaxis_title="Date",
    yaxis_title="Price (USD)",
    hovermode="x unified",
    showlegend=True,
    template="plotly_white",
    height=600,
    width=1000,
    xaxis=dict(
        tickformat="%Y-%m-%d",
        tickangle=45,
    ),
    yaxis=dict(tickformat="$,.0f"),
)

fig.show()

eval_metrics = model.evaluate_predictions(
    all_test_prices[valid_indices], 
    all_test_predictions[valid_indices]
)

print(f"MAE: {eval_metrics['mae']:,.2f}")
print(f"RMSE: {eval_metrics['rmse']:,.2f}")
print(f"R2 Skor: {eval_metrics['r2']:.4f}")
print(f"MAPE: {eval_metrics['mape']:.2f}%")

### Autogluon

In [None]:
from model.automl_autogluon import BitcoinPricePredictor
from preprocessing.model_preprocessor import FinancialDataPreprocessor

plt.style.use("seaborn")
plt.rcParams["figure.figsize"] = (12, 6)


data_preprocessor = FinancialDataPreprocessor(lookback_period=30)
preprocessed_data = data_preprocessor.prepare_data(
    df=df,
    train_start="2023-01-01",
    train_end="2024-09-30",
    test_start="2024-10-01",
    test_end="2024-10-30",
)

selected_features = [
    "High",
    "usd_buy",
    "gold_Price",
    "RSI",
    "MA_7",
    "BTC_Gold_Ratio",
    "BTC_USD_Ratio",
]

train_data = preprocessed_data["train"].copy()
test_data = preprocessed_data["test"].copy()

train_features = train_data[selected_features + ["Date", "Price"]].copy()
test_features = test_data[selected_features + ["Date", "Price"]].copy()

print("Train kolonları:", train_features.columns.tolist())
print("Test kolonları:", test_features.columns.tolist())
print(
    "Tekrarlanan kolonlar:",
    train_features.columns[train_features.columns.duplicated()].tolist(),
)

try:
    predictor = BitcoinPricePredictor(
        target_column="Price",
        feature_columns=selected_features,
        time_limit=600,  # 10 dakika
    )

    print("\nModel eğitimi başlıyor...")
    predictor.train_model(train_features)

    print("\nModel değerlendiriliyor...")
    results = predictor.evaluate_model(test_features)

    print("\nModel Performans Metrikleri:")
    print("-" * 30)
    for metric, value in results["metrics"].items():
        print(f"{metric}: {value:.4f}")

    plt.figure(figsize=(12, 6))
    importance_df = results["feature_importance"]
    if isinstance(importance_df, pd.DataFrame):
        importance_series = importance_df.iloc[:, 0]
    else:
        importance_series = importance_df

    importance_series = importance_series.sort_values(ascending=True)
    plt.barh(range(len(importance_series)), importance_series.values)
    plt.yticks(range(len(importance_series)), importance_series.index)
    plt.title("Feature Importance")
    plt.xlabel("Importance Score")
    plt.tight_layout()
    plt.show()

    predictions_df = results["predictions"]
    plt.figure(figsize=(15, 7))
    plt.plot(
        test_features["Date"], predictions_df["actual"], label="Gerçek Değer", alpha=0.7
    )
    plt.plot(
        test_features["Date"], predictions_df["predicted"], label="Tahmin", alpha=0.7
    )
    plt.title("Bitcoin Fiyat Tahminleri vs Gerçek Değerler")
    plt.xlabel("Tarih")
    plt.ylabel("Fiyat")
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(10, 6))
    errors = predictions_df["actual"] - predictions_df["predicted"]
    sns.histplot(errors, kde=True)
    plt.title("Tahmin Hatalarının Dağılımı")
    plt.xlabel("Hata")
    plt.ylabel("Frekans")
    plt.tight_layout()
    plt.show()

    performance_log = {
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "metrics": results["metrics"],
        "model_params": {
            "time_limit": 600,
            "target_column": "Price",
            "feature_columns": selected_features,
        },
        "train_period": f"{train_features['Date'].iloc[0]} to {train_features['Date'].iloc[-1]}",
        "test_period": f"{test_features['Date'].iloc[0]} to {test_features['Date'].iloc[-1]}",
    }

    with open("model/autogluon_performance_log.json", "w") as f:
        json.dump(performance_log, f, indent=4)

    results_df = pd.DataFrame(
        {
            "Date": test_features["Date"],
            "Actual": predictions_df["actual"],
            "Predicted": predictions_df["predicted"],
            "Error": errors,
        }
    )

    results_path = "model/autogluon_predictions.csv"
    results_df.to_csv(results_path, index=False)
    print(f"\nTahminler '{results_path}' dosyasına kaydedildi.")

except Exception as e:
    print(f"Hata oluştu: {str(e)}")
    import traceback

    traceback.print_exc()