In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load dataset
data = pd.read_csv('/content/btcusd_1-min_data.csv')

# Convert Timestamp to datetime and extract additional features
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data['DayOfWeek'] = data['Timestamp'].dt.dayofweek  # Extracting day of the week
data['Hour'] = data['Timestamp'].dt.hour  # Extracting hour (if intraday data)

# Feature Engineering
data['5_day_MA'] = data['Close'].rolling(window=5).mean()
data['20_day_MA'] = data['Close'].rolling(window=20).mean()
data['Price_Change'] = data['Close'].pct_change()  # Daily price change
data['Volume_Change'] = data['Volume'].pct_change()

# Drop rows with NaN values after feature engineering
data.dropna(inplace=True)

# Replace infinite values (if any)
data.replace([float('inf'), float('-inf')], 0, inplace=True)

# Ensure no NaN values remain after replacement
data.fillna(0, inplace=True)

# Normalization
scaler = MinMaxScaler()
data[['Open', 'High', 'Low', 'Close', 'Volume', '5_day_MA', '20_day_MA', 'Price_Change', 'Volume_Change']] = scaler.fit_transform(
    data[['Open', 'High', 'Low', 'Close', 'Volume', '5_day_MA', '20_day_MA', 'Price_Change', 'Volume_Change']]
)

# Create lagged features
data['Close_Lag1'] = data['Close'].shift(1)
data['Volume_Lag1'] = data['Volume'].shift(1)
data.dropna(inplace=True)

# Save the processed data to a new CSV file
data.to_csv('processed_crypto_data.csv', index=False)
