## نصب کتابخانه‌های مورد نیاز
در این بخش، کتابخانه‌های لازم برای اجرای مدل نصب می‌شوند.

In [None]:
!pip install tensorflow
!pip install scikit-learn
!pip install pandas numpy matplotlib joblib

## اتصال به Google Drive
در این بخش، Google Drive متصل می‌شود تا بتوانیم داده‌ها را از آن بخوانیم و خروجی‌ها را در آن ذخیره کنیم.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## وارد کردن کتابخانه‌ها
در این بخش، همه کتابخانه‌های مورد نیاز برای اجرای مدل وارد می‌شوند.

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import os
import joblib
from tensorflow.keras.models import save_model
import datetime
import tensorflow as tf
import time

## تنظیم مسیر ذخیره‌سازی در Google Drive
یک پوشه در Google Drive برای ذخیره مدل، گزارش و نمودارها ایجاد می‌کنیم.

In [None]:
# تنظیم مسیر ذخیره‌سازی با تاریخ و زمان
run_name = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
save_dir = f'/content/drive/MyDrive/Bitcoin_Hourly_Prediction_{run_name}'
os.makedirs(save_dir, exist_ok=True)
print(f"خروجی‌ها در مسیر {save_dir} ذخیره خواهند شد.")

## تعریف توابع کمکی
در این بخش، توابع مورد نیاز برای پیش‌پردازش، آموزش و ارزیابی مدل تعریف می‌شوند.

In [None]:
def calculate_rsi(data, periods=14):
    delta = data['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=periods).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=periods).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_rsi_for_row(last_sequence_orig, periods=14):
    prices = last_sequence_orig[:, 0]  # ستون Close
    delta = np.diff(prices)
    gain = np.mean(delta[delta > 0]) if len(delta[delta > 0]) > 0 else 0
    loss = -np.mean(delta[delta < 0]) if len(delta[delta < 0]) > 0 else 0
    rs = gain / loss if loss != 0 else float('inf')
    rsi = 100 - (100 / (1 + rs)) if rs != float('inf') else 50.0
    return rsi

def predict_future_days(model, last_sequence, scaler_X, scaler_y, future_hours=24):
    print(f"\nپیش‌بینی قیمت‌ها برای {future_hours} ساعت آینده...")
    last_sequence_orig = scaler_X.inverse_transform(last_sequence.reshape(-1, last_sequence.shape[-1]))
    future_predictions = []
    future_dates = []
    current_sequence = last_sequence.copy()
    last_date = pd.to_datetime('2025-03-10 23:00:00')  # تاریخ آخرین داده
    
    for i in range(future_hours):
        next_pred = model.predict(current_sequence.reshape(1, current_sequence.shape[0], current_sequence.shape[1]), verbose=0)
        next_pred_orig = scaler_y.inverse_transform(next_pred)
        future_predictions.append(next_pred_orig[0][0])
        next_date = last_date + pd.Timedelta(hours=i+1)
        future_dates.append(next_date)
        
        new_row = np.zeros((1, last_sequence.shape[-1]))
        new_row[0, 0] = next_pred_orig[0][0]  # Close
        new_row[0, 1] = next_pred_orig[0][0] * 1.001  # High
        new_row[0, 2] = next_pred_orig[0][0] * 0.999  # Low
        new_row[0, 3] = next_pred_orig[0][0]  # Open
        new_row[0, 4] = last_sequence_orig[-1, 4]  # Volume
        new_row[0, 5] = next_date.hour
        new_row[0, 6] = next_date.dayofweek
        new_row[0, 7] = next_date.month
        new_row[0, 8] = 1 if next_date.dayofweek >= 5 else 0
        new_row[0, 9] = (new_row[0, 0] - last_sequence_orig[-1, 0]) / last_sequence_orig[-1, 0]  # Hourly Return
        new_row[0, 10] = (new_row[0, 0] - last_sequence_orig[-24, 0]) / last_sequence_orig[-24, 0]  # Daily Return
        new_row[0, 11] = np.mean(np.append(last_sequence_orig[-23:, 0], new_row[0, 0]))  # SMA_24
        new_row[0, 12] = np.mean(np.append(last_sequence_orig[-49:, 0], new_row[0, 0]))  # SMA_50
        new_row[0, 13] = np.mean(np.append(last_sequence_orig[-199:, 0], new_row[0, 0]))  # SMA_200
        new_row[0, 14] = calculate_rsi_for_row(np.append(last_sequence_orig[:, 0], new_row[0, 0]))  # RSI
        new_row[0, 15] = np.std(np.append(last_sequence_orig[-11:, 0], new_row[0, 0]))  # Volatility_12h
        new_row[0, 16] = np.std(np.append(last_sequence_orig[-23:, 0], new_row[0, 0]))  # Volatility_24h
        new_row[0, 17] = new_row[0, 3] - last_sequence_orig[-1, 0]  # Price_Gap
        new_row[0, 18] = new_row[0, 4] / np.mean(last_sequence_orig[-24:, 4])  # Volume_Ratio
        new_row[0, 19] = new_row[0, 1] - new_row[0, 2]  # Price_Range
        
        new_row_scaled = scaler_X.transform(new_row)
        current_sequence = np.vstack([current_sequence[1:], new_row_scaled])
        last_sequence_orig = np.vstack([last_sequence_orig[1:], new_row])
    
    return np.array(future_predictions), np.array(future_dates)

def load_data(file_path):
    print(f"\nخواندن داده‌ها از فایل: {file_path}")
    try:
        df = pd.read_csv(file_path, skiprows=[1, 2])
        print(f"شکل اولیه داده‌ها: {df.shape}")
        print(f"ستون‌های اولیه: {df.columns.tolist()}\n")
        df = df.drop('Daily Return', axis=1)
        df['Price'] = pd.to_datetime(df['Price'])
        df.set_index('Price', inplace=True)
        df.index.name = 'Date'
        numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
        for col in numeric_columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
        print("نمونه‌ای از داده‌های اولیه:")
        print(df.head(), "\n")
        print("اضافه کردن ویژگی‌های جدید...")
        df['Hour'] = df.index.hour
        df['Day_of_Week'] = df.index.dayofweek
        df['Month'] = df.index.month
        df['Is_Weekend'] = df.index.dayofweek.isin([5, 6]).astype(int)
        df['Hourly_Return'] = df['Close'].pct_change()
        df['Daily_Return'] = df['Close'].pct_change(24)
        df['SMA_24'] = df['Close'].rolling(window=24).mean()
        df['SMA_50'] = df['Close'].rolling(window=50).mean()
        df['SMA_200'] = df['Close'].rolling(window=200).mean()
        df['RSI'] = calculate_rsi(df, periods=14)
        df['Volatility_12h'] = df['Hourly_Return'].rolling(window=12).std()
        df['Volatility_24h'] = df['Hourly_Return'].rolling(window=24).std()
        df['Price_Gap'] = df['Open'] - df['Close'].shift(1)
        df['Volume_Ratio'] = df['Volume'] / df['Volume'].rolling(window=24).mean()
        df['Price_Range'] = df['High'] - df['Low']
        print("\nقبل از حذف NaN:")
        print(f"تعداد رکوردها: {len(df)}")
        print(f"تعداد ویژگی‌ها: {len(df.columns)}")
        print("تعداد NaN در هر ستون:")
        print(df.isna().sum())
        df_cleaned = df.dropna()
        if len(df_cleaned) == 0:
            raise ValueError("پس از حذف NaN هیچ داده‌ای باقی نمانده است!")
        print("\nپس از حذف NaN:")
        print(f"تعداد رکوردها: {len(df_cleaned)}")
        print(f"تعداد ویژگی‌ها: {len(df_cleaned.columns)}")
        print(f"بازه زمانی: از {df_cleaned.index[0]} تا {df_cleaned.index[-1]}\n")
        return df_cleaned
    except Exception as e:
        print(f"خطا در خواندن یا پردازش داده‌ها: {str(e)}")
        raise

def save_plots_and_report(save_dir, history, train_metrics, test_metrics, future_predictions, future_dates, data, train_predictions, test_predictions, y_train_original, y_test_original):
    # ذخیره نمودار Loss
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(os.path.join(save_dir, 'loss_plot.png'))
    plt.close()
    
    # ذخیره نمودار پیش‌بینی
    plt.figure(figsize=(14, 7))
    plt.plot(data.index[-len(y_test_original):], y_test_original, label='Actual Price')
    plt.plot(data.index[-len(test_predictions):], test_predictions, label='Predicted Price')
    plt.plot(future_dates, future_predictions, '--', label='Future Predictions (24 hours)')
    plt.title('Bitcoin Hourly Price Prediction')
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.legend()
    plt.savefig(os.path.join(save_dir, 'price_prediction_plot.png'))
    plt.close()
    
    # ذخیره گزارش
    report_path = os.path.join(save_dir, 'evaluation_report.txt')
    with open(report_path, 'w', encoding='utf-8') as f:
        f.write("گزارش ارزیابی مدل پیش‌بینی قیمت بیت‌کوین ساعتی\n")
        f.write("==================================================\n\n")
        f.write("معیارهای آموزشی:\n")
        f.write(f"MSE: {train_metrics['MSE']:.2f}\n")
        f.write(f"RMSE: {train_metrics['RMSE']:.2f}\n")
        f.write(f"MAE: {train_metrics['MAE']:.2f}\n")
        f.write(f"R²: {train_metrics['R2']:.4f}\n\n")
        f.write("معیارهای تست:\n")
        f.write(f"MSE: {test_metrics['MSE']:.2f}\n")
        f.write(f"RMSE: {test_metrics['RMSE']:.2f}\n")
        f.write(f"MAE: {test_metrics['MAE']:.2f}\n")
        f.write(f"R²: {test_metrics['R2']:.4f}\n\n")
        f.write("پیش‌بینی قیمت برای ۲۴ ساعت آینده:\n")
        for date, price in zip(future_dates, future_predictions):
            f.write(f"{date}: ${price:.2f}\n")

def predict_with_lstm(data, lag_days=24, epochs=50, batch_size=32, validation_split=0.1,
                     dropout=0.3, early_stopping_patience=10, restore_best_weights=True,
                     optimizer='adam', loss='mse'):
    start_time = time.time()
    model_params = {
        'lag_days': lag_days,
        'epochs': epochs,
        'batch_size': batch_size,
        'validation_split': validation_split,
        'dropout': dropout,
        'early_stopping_patience': early_stopping_patience,
        'restore_best_weights': restore_best_weights,
        'optimizer': optimizer,
        'loss': loss
    }
    print("\nبررسی داده‌های ورودی:")
    print(f"شکل داده‌ها: {data.shape}")
    print(f"ستون‌های موجود: {data.columns.tolist()}")
    X = data.drop(['Close'], axis=1)
    y = data['Close']
    train_size = int(len(X) * 0.88)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()
    X_train_scaled = scaler_X.fit_transform(X_train)
    y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
    X_test_scaled = scaler_X.transform(X_test)
    y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))
    X_lstm_train = []
    y_lstm_train = []
    for i in range(lag_days, len(X_train_scaled)):
        X_lstm_train.append(X_train_scaled[i-lag_days:i])
        y_lstm_train.append(y_train_scaled[i])
    X_lstm_test = []
    y_lstm_test = []
    for i in range(lag_days, len(X_test_scaled)):
        X_lstm_test.append(X_test_scaled[i-lag_days:i])
        y_lstm_test.append(y_test_scaled[i])
    X_lstm_train, y_lstm_train = np.array(X_lstm_train), np.array(y_lstm_train)
    X_lstm_test, y_lstm_test = np.array(X_lstm_test), np.array(y_lstm_test)
    print(f"Length of X_lstm_train: {len(X_lstm_train)}, Length of y_lstm_train: {len(y_lstm_train)}")
    print(f"Length of X_lstm_test: {len(X_lstm_test)}, Length of y_lstm_test: {len(y_lstm_test)}")
    
    model = Sequential([
        Input(shape=(lag_days, X.shape[1])),
        LSTM(128, activation='tanh', return_sequences=True,
             kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01)),
        BatchNormalization(),
        Dropout(0.3),
        LSTM(64, activation='tanh', return_sequences=True,
             kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01)),
        BatchNormalization(),
        Dropout(0.3),
        LSTM(32, activation='tanh',
             kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01)),
        BatchNormalization(),
        Dropout(0.2),
        Dense(16, activation='relu',
              kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01)),
        BatchNormalization(),
        Dropout(0.1),
        Dense(1)
    ])
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer, loss=loss)
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=early_stopping_patience,
        restore_best_weights=restore_best_weights
    )
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=5,
        min_lr=0.00001,
        verbose=1
    )
    history = model.fit(
        X_lstm_train, y_lstm_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=validation_split,
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )
    processing_time = time.time() - start_time
    print(f"\nزمان کل پردازش: {processing_time:.2f} ثانیه")
    train_predictions = model.predict(X_lstm_train, verbose=0)
    test_predictions = model.predict(X_lstm_test, verbose=0)
    train_predictions = scaler_y.inverse_transform(train_predictions)
    test_predictions = scaler_y.inverse_transform(test_predictions)
    y_train_original = scaler_y.inverse_transform(y_lstm_train)
    y_test_original = scaler_y.inverse_transform(y_lstm_test)
    train_metrics = {
        'MSE': mean_squared_error(y_train_original, train_predictions),
        'RMSE': np.sqrt(mean_squared_error(y_train_original, train_predictions)),
        'MAE': mean_absolute_error(y_train_original, train_predictions),
        'R2': r2_score(y_train_original, train_predictions)
    }
    test_metrics = {
        'MSE': mean_squared_error(y_test_original, test_predictions),
        'RMSE': np.sqrt(mean_squared_error(y_test_original, test_predictions)),
        'MAE': mean_absolute_error(y_test_original, test_predictions),
        'R2': r2_score(y_test_original, test_predictions)
    }
    print("\nنتایج ارزیابی مدل:")
    print("معیارهای آموزش:")
    for metric, value in train_metrics.items():
        print(f"{metric}: {value:.4f}")
    print("\nمعیارهای تست:")
    for metric, value in test_metrics.items():
        print(f"{metric}: {value:.4f}")
    last_sequence = X_lstm_test[-1:]
    future_predictions, future_dates = predict_future_days(model, last_sequence, scaler_X, scaler_y)
    print("\nپیش‌بینی قیمت برای ۲۴ ساعت آینده:")
    for date, price in zip(future_dates, future_predictions):
        print(f"{date.strftime('%Y-%m-%d %H:%M:%S')}: ${price:.2f}")
    model_path = os.path.join(save_dir, 'lstm_model.h5')
    model.save(model_path)
    scaler_X_path = os.path.join(save_dir, 'scaler_X.pkl')
    scaler_y_path = os.path.join(save_dir, 'scaler_y.pkl')
    joblib.dump(scaler_X, scaler_X_path)
    joblib.dump(scaler_y, scaler_y_path)
    save_plots_and_report(save_dir, history, train_metrics, test_metrics, future_predictions, future_dates, data, train_predictions, test_predictions, y_train_original, y_test_original)
    return model, scaler_X, scaler_y, model_params

## بارگذاری داده‌ها و اجرای مدل
داده‌ها از Google Drive بارگذاری می‌شوند و مدل آموزش داده می‌شود.

In [None]:
file_path = '/content/drive/MyDrive/bitcoin_20170101_20250310_1h.csv'
data = load_data(file_path)
model, scaler_X, scaler_y, model_params = predict_with_lstm(
    data,
    lag_days=24,
    epochs=50,
    batch_size=32,
    validation_split=0.1,
    dropout=0.3,
    early_stopping_patience=10,
    restore_best_weights=True,
    optimizer='adam',
    loss='mse'
)

## نکته
برای سرعت بیشتر، مطمئن شوید که از GPU استفاده می‌کنید:
- به منوی Runtime > Change runtime type بروید.
- در بخش Hardware accelerator، گزینه GPU را انتخاب کنید.

فایل‌های خروجی (مدل، اسکالرها، گزارش و نمودارها) در مسیر زیر در Google Drive ذخیره شده‌اند:
`/content/drive/MyDrive/Bitcoin_Hourly_Prediction_{run_name}`