## Bitcoin Price Prediction

In [5]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [None]:
from preprocessing.data_preprocessor import FinancialDataPreprocessor

preprocessor = FinancialDataPreprocessor()

preprocessor.load_data(
    bitcoin_path=r"C:\Users\enesm\OneDrive\Masaüstü\tubitak\data\Bitcoin Historical Data.csv",
    usd_path=r"C:\Users\enesm\OneDrive\Masaüstü\tubitak\data\dolar.csv",
    gold_path=r"C:\Users\enesm\OneDrive\Masaüstü\tubitak\data\XAU_USD Geçmiş Verileri.csv",
)

merged_df = preprocessor.merge_data()

merged_df.to_csv(
    r"C:\Users\enesm\OneDrive\Masaüstü\tubitak\data\merged_data.csv", index=False
)

data_info = preprocessor.get_data_info()s
print("Data Shape:", data_info["shape"])
print("Date Range:", data_info["date_range"])
print("Unique Dates:", data_info["unique_dates"])
print("Missing Values:", data_info["missing_values"])
merged_df.head()

In [None]:
merged_df.tail()

### Corr matrix

In [None]:

correlation_matrix = merged_df.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm', cbar=True)
plt.title('Correlation Heatmap')
plt.show()


In [None]:
from visualization.data_eda import FinancialDataEDA

eda = FinancialDataEDA(merged_df)

time_series = eda.plot_time_series()
time_series.show()

In [None]:
correlation_matrix = eda.plot_correlation_matrix()
correlation_matrix.show()

In [None]:
# returns = eda.plot_returns_distribution()
# returns.show()

detailed_corr = eda.get_detailed_correlations()
print("\nTop 10 Strongest Correlations:")
print(detailed_corr.head(15))

In [11]:
# risk metrics
# risk_metrics = eda.generate_risk_metrics()
# for asset, metrics in risk_metrics.items():
#    print(f"\n{asset} Metrics:")
#    for metric, value in metrics.items():
#        print(f"{metric}: {value:.2f}")

In [None]:
from preprocessing.model_preprocessor import FinancialDataPreprocessor


preprocessor = FinancialDataPreprocessor(lookback_period=30)

processed_data = preprocessor.prepare_data(
    df=merged_df,
    train_start="2023-09-01",
    train_end="2024-09-14",
    test_start="2024-09-15",
    test_end="2024-09-30",
)

train_data = processed_data["train"]
test_data = processed_data["test"]

preprocessor.check_data_quality(train_data, test_data)

### Feature Selection

In [None]:
train_data.columns

In [None]:
test_data.columns

In [17]:
#selected_features = ['Date', 'Price','Open', 'High', 'Low', 'Vol.', 'Change %', 'usd_buy',
#'usd_sell', 'gold_Price', 'gold_Open', 'gold_High', 'gold_Low',
#'gold_Change', 'RSI', 'MA_7', 'EMA_7', 'MA_14',
#'EMA_14','Volume_MA','BTC_Gold_Ratio','BTC_USD_Ratio']

#'usd_sell', 'MA_14',

selected_features = ['Date', 'Price', 'High', 'usd_buy',
        'gold_Price', 'RSI', 'MA_7', 'BTC_Gold_Ratio','BTC_USD_Ratio']

train_data = train_data[selected_features]
test_data = test_data[selected_features]

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Örnek olarak merged_df'yi oluşturduğunuzu varsayalım
# merged_df = pd.read_csv('your_data.csv')  # Eğer bir dosyadan yükleyecekseniz

# Korelasyon matrisini hesaplayın
correlation_matrix = train_data.corr()

# Heatmap'i çizdirin
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm', cbar=True)
plt.title('Correlation Heatmap')
plt.show()


### Correlation After Feature Engineering  

In [None]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.colheader_justify", "center")

correlation_matrix = train_data.corr()

mask = np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool)
high_corr_matrix = correlation_matrix.where(mask)

high_corr = (
    high_corr_matrix.stack()
    .reset_index()
    .rename(columns={"level_0": "Feature 1", "level_1": "Feature 2", 0: "Correlation"})
)


threshold = 0.8
high_corr_filtered = high_corr[high_corr["Correlation"].abs() >= threshold]

high_corr_filtered[["Feature 1", "Feature 2"]] = np.sort(
    high_corr_filtered[["Feature 1", "Feature 2"]], axis=1
)
high_corr_filtered = high_corr_filtered.drop_duplicates(
    subset=["Feature 1", "Feature 2"]
)

high_corr_filtered = high_corr_filtered.sort_values(
    by="Correlation", ascending=False
).reset_index(drop=True)
high_corr_filtered.index += 1
high_corr_filtered.index.name = "Index"

print(high_corr_filtered)

In [None]:
from model.models import FinancialModelPipeline

pipeline = FinancialModelPipeline(train_data, test_data)
pipeline.train_and_evaluate()

In [None]:
from visualization.model_visualizations import ModelVisualizer

visualizer = ModelVisualizer(pipeline)

# model metrics
fig_metrics = visualizer.plot_model_metrics()
fig_metrics.show()

# best model
fig_best = visualizer.plot_best_model_predictions()
fig_best.show()

# all predictions
fig_all = visualizer.plot_all_predictions()
fig_all.show()

### Feature Importance

In [None]:
# Decision_Tree, Random_Forest, XGBoost, LightGBM, CatBoost, AdaBoost
fig_importance = visualizer.plot_feature_importance("LightGBM")
if fig_importance:
    fig_importance.show()

### Model Parametre Tuning

In [15]:
# tune selected models
# pipeline.tune_models(['Random_Forest', 'XGBoost'])

# tune all models
# pipeline.tune_models()

# tuning results
# pipeline.train_and_evaluate()

### LSTM

In [None]:
import json
from datetime import datetime
from model.lstm_model import FinancialLSTM
from preprocessing.lstm_model_preprocessor import LSTMDataPreprocessor
from preprocessing.model_preprocessor import FinancialDataPreprocessor


df = pd.read_csv(r"C:\Users\enesm\OneDrive\Masaüstü\tubitak\data\merged_data.csv")
df["Date"] = pd.to_datetime(df["Date"])


sequence_length = 20

data_preprocessor = FinancialDataPreprocessor(lookback_period=30)
lstm_preprocessor = LSTMDataPreprocessor(
    sequence_length=sequence_length,
    target_column="Price",
    feature_columns=[
        "Price",
        "High",
        "Low",
        "Vol.",
        "MA_7",
        "RSI",
        "MACD",
        "BB_middle",
        "Price_Momentum_1",
        "Price_Momentum_3",
    ],
)

preprocessed_data = data_preprocessor.prepare_data(
    df=df,
    train_start="2022-01-01",
    train_end="2024-08-15",  
    test_start="2024-08-16",
    test_end="2024-09-30",
)

lstm_data = lstm_preprocessor.prepare_lstm_data(preprocessed_data)

model = FinancialLSTM(
    sequence_length=sequence_length,
    epochs=150,
    batch_size=32,
    lstm_units=[128, 64, 32],
    dropout_rate=0.1,
)

metrics = model.train(lstm_data)
print("\nTraining metrics:", metrics)

predictions = model.predict(lstm_data["test"]["X"])
predictions = lstm_preprocessor.inverse_transform_predictions(predictions)

test_dates = preprocessed_data["test"]["Date"].values[
    sequence_length : len(predictions) + sequence_length
]
y_true = preprocessed_data["test"][lstm_preprocessor.target_column].values[
    sequence_length : len(predictions) + sequence_length
]

model.plot_predictions(test_dates, y_true, predictions.flatten())

eval_metrics = model.evaluate_predictions(y_true, predictions.flatten())
print("\nTest Set Performance Metrics:")
print(f"MAE: ${eval_metrics['mae']:,.2f}")
print(f"RMSE: ${eval_metrics['rmse']:,.2f}")
print(f"R2 Score: {eval_metrics['r2']:.4f}")
print(f"MAPE: {eval_metrics['mape']:.2f}%")


performance_log = {
    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "metrics": eval_metrics,
    "model_params": {
        "sequence_length": sequence_length,
        "lstm_units": [128, 64, 32],
        "dropout_rate": 0.1,
        "batch_size": 32,
        "epochs": 150,
    },
    "train_period": f"{preprocessed_data['train']['Date'].iloc[0]} to {preprocessed_data['train']['Date'].iloc[-1]}",
    "test_period": f"{preprocessed_data['test']['Date'].iloc[0]} to {preprocessed_data['test']['Date'].iloc[-1]}",
}

with open("model\lstm_performance_log.json", "w") as f:
    json.dump(performance_log, f, indent=4)

print("\nModel performance saved to 'model_performance_log.json'")

### Autogluon

In [None]:
from model.automl_autogluon import BitcoinPricePredictor

PARAMS = {
    'data_path': r'C:\Users\enesm\OneDrive\Masaüstü\tubitak\data\merged_data.csv',  
    'train_start':"2022-01-01",
    'train_end':"2024-09-14",
    'test_start':"2024-09-15",
    'test_end':"2024-09-30",
    'time_limit': 600  
}

predictor = BitcoinPricePredictor()

print("Veri hazırlanıyor...")
train_data, test_data = predictor.prepare_data(
    PARAMS['data_path'],
    PARAMS['train_start'],
    PARAMS['train_end'],
    PARAMS['test_start'],
    PARAMS['test_end']
)

print("\nModel eğitimi başlıyor...")
predictor.train_model(time_limit=PARAMS['time_limit'])

print("\nModel değerlendiriliyor...")
results = predictor.evaluate_model()

print("\nModel Performans Metrikleri:")
print("-" * 30)
for metric, value in results['metrics'].items():
    print(f"{metric}: {value:.4f}")

plt.figure(figsize=(12, 6))

importance_df = results['feature_importance']
if isinstance(importance_df, pd.DataFrame):
    importance_series = importance_df.iloc[:, 0]
else:
    importance_series = importance_df

importance_series = importance_series.sort_values(ascending=True)
top_10_features = importance_series.tail(10)

y_pos = np.arange(len(top_10_features))
plt.barh(y_pos, top_10_features.values)
plt.yticks(y_pos, top_10_features.index)
plt.title('En Önemli 10 Feature')
plt.xlabel('Önem Skoru')
plt.tight_layout()
plt.show()

predictions_df = results['predictions']
plt.figure(figsize=(15, 7))
plt.plot(test_data['Date'].values, predictions_df['actual'], label='Gerçek Değer', alpha=0.7)
plt.plot(test_data['Date'].values, predictions_df['predicted'], label='Tahmin', alpha=0.7)
plt.title('Bitcoin Fiyat Tahminleri vs Gerçek Değerler')
plt.xlabel('Tarih')
plt.ylabel('Fiyat')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 6))
errors = predictions_df['actual'] - predictions_df['predicted']
sns.histplot(errors, kde=True)
plt.title('Tahmin Hatalarının Dağılımı')
plt.xlabel('Hata')
plt.ylabel('Frekans')
plt.tight_layout()
plt.show()

results_df = pd.DataFrame({
    'Date': test_data['Date'],
    'Actual': predictions_df['actual'],
    'Predicted': predictions_df['predicted'],
    'Error': errors
})

results_path = r'C:\Users\enesm\OneDrive\Masaüstü\tubitak\src\model\autogluon_predictions.csv'
results_df.to_csv(results_path, index=False)
print(f"\nTahminler '{results_path}' dosyasına kaydedildi.")