In [4]:
# Import necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
import torch
import torch.nn as nn
import torch.optim as optim
import pandas_ta as ta
from sklearn.preprocessing import MinMaxScaler
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Function to fetch price data
def fetch_price_data(symbol, interval='1h', days=6):
    end_date = pd.to_datetime('today')
    start_date = end_date - pd.Timedelta(days=days)  # Fetching data for the last 6 days
    try:
        data = yf.download(symbol, start=start_date, end=end_date, interval=interval)
        if data.empty:
            raise ValueError(f"No data found for {symbol} in the last {days} days.")
    except ValueError as e:
        logger.error(e)
        return None
    return data

# Function to preprocess the data
def preprocess_data(df):
    # Technical indicators using pandas_ta
    df['RSI'] = ta.rsi(df['Close'], length=14)
    macd = ta.macd(df['Close'])
    df['MACD'] = macd['MACD_12_26_9']
    df['MACD_signal'] = macd['MACDs_12_26_9']
    bbands = ta.bbands(df['Close'])
    df['Bollinger_upper'] = bbands.get('BBU_20_2.0', np.nan)
    df['Bollinger_middle'] = bbands.get('BBM_20_2.0', np.nan)
    df['Bollinger_lower'] = bbands.get('BBL_20_2.0', np.nan)

    # Assigning data types
    df = df.astype({
        'Open': 'float64',
        'High': 'float64',
        'Low': 'float64',
        'Close': 'float64',
        'Adj Close': 'float64',
        'Volume': 'int64',
        'RSI': 'float64',
        'MACD': 'float64',
        'MACD_signal': 'float64',
        'Bollinger_upper': 'float64',
        'Bollinger_middle': 'float64',
        'Bollinger_lower': 'float64'
    })

    # Logging the number of NaNs
    logger.info(f"NaNs before dropping: \n{df.isna().sum()}")

    # Drop rows with NaN values
    df.dropna(inplace=True)

    # Logging the number of NaNs after dropping
    logger.info(f"NaNs after dropping: \n{df.isna().sum()}")

    # If data is empty after dropping NaNs, log an error and return None
    if df.empty:
        logger.error("Preprocessed data is empty after dropping NaNs.")
        return None, None

    # Scaling data
    scaler = MinMaxScaler()
    df[['Close', 'RSI', 'MACD', 'MACD_signal', 'Bollinger_upper', 'Bollinger_middle', 'Bollinger_lower']] = scaler.fit_transform(
        df[['Close', 'RSI', 'MACD', 'MACD_signal', 'Bollinger_upper', 'Bollinger_middle', 'Bollinger_lower']]
    )
    
    return df, scaler

# Function to create sequences
def create_sequences(data, seq_length, target_shift):
    xs = []
    ys = []
    for i in range(len(data) - seq_length - target_shift):
        x = data.iloc[i:(i + seq_length)].values
        y = data.iloc[i + seq_length + target_shift]['Close']
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys).reshape(-1, 1)

# Define the MLP model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(7, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Function to train the model
def train_model(X_train, y_train, model, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        inputs = torch.tensor(X_train, dtype=torch.float32).to(device)
        targets = torch.tensor(y_train, dtype=torch.float32).to(device)
        optimizer.zero_grad()
        outputs = model(inputs).squeeze()
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Main function
def main():
    # Parameters
    symbol = 'BTC-USD'
    seq_length = 50
    target_shift = 10

    # Fetch and prepare data
    price_data = fetch_price_data(symbol, interval='1h', days=6)
    if price_data is None:
        logger.error(f"No data found for {symbol}. Skipping further processing.")
        return
    try:
        processed_data, scaler = preprocess_data(price_data)
        if processed_data is None:
            return
    except ValueError as e:
        logger.error(e)
        return

    # Log the number of rows in the processed data
    logger.info(f"Number of rows in processed data: {len(processed_data)}")

    # Split data into training and test sets
    train_size = int(len(processed_data) * 0.8)
    train_data, test_data = processed_data[:train_size], processed_data[train_size:]

    # Create sequences
    X_train, y_train = create_sequences(train_data, seq_length, target_shift)
    X_test, y_test = create_sequences(test_data, seq_length, target_shift)

    if len(X_train) == 0 or len(X_test) == 0:
        logger.error("Not enough data to create sequences. Skipping further processing.")
        return

    # Initialize model, criterion, and optimizer
    model = MLP().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train model
    train_model(X_train, y_train, model, criterion, optimizer, num_epochs=100)

    # Evaluate model
    model.eval()
    inputs = torch.tensor(X_test, dtype=torch.float32).to(device)
    targets = torch.tensor(y_test, dtype=torch.float32).to(device)
    outputs = model(inputs).squeeze()
    loss = criterion(outputs, targets)
    print(f'Test Loss: {loss.item():.4f}')

# Run the main function
if __name__ == '__main__':
    main()


[*********************100%%**********************]  1 of 1 completed
INFO:root:NaNs before dropping: 
Open                  0
High                  0
Low                   0
Close                 0
Adj Close             0
Volume                0
RSI                  14
MACD                 33
MACD_signal          33
Bollinger_upper     145
Bollinger_middle    145
Bollinger_lower     145
dtype: int64
INFO:root:NaNs after dropping: 
Open                0
High                0
Low                 0
Close               0
Adj Close           0
Volume              0
RSI                 0
MACD                0
MACD_signal         0
Bollinger_upper     0
Bollinger_middle    0
Bollinger_lower     0
dtype: int64
ERROR:root:Preprocessed data is empty after dropping NaNs.


Using device: cuda
