## Binary option AI trading

#### made by DiilanOfficial

In [37]:
# import necessary libraby

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

### Data preparation

In [25]:
# Load the data and specify the date format

df = pd.read_csv('../data/EURJPY_historical_data.csv')

df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y %H:%M')

# Resample data to 30-minute intervals
df_resampled = df.resample('30min', on='Date').agg({
    'Open': 'first',
    'High': 'max',
    'Low': 'min',
    'Close': 'last'
}).dropna().reset_index()

print(df_resampled.head(20))

                  Date     Open     High      Low    Close
0  2024-09-16 03:30:00  155.638  155.645  155.470  155.590
1  2024-09-16 04:00:00  155.597  155.640  155.402  155.601
2  2024-09-16 04:30:00  155.599  155.703  155.549  155.599
3  2024-09-16 05:00:00  155.598  155.670  155.478  155.533
4  2024-09-16 05:30:00  155.536  155.588  155.446  155.580
5  2024-09-16 06:00:00  155.586  155.632  155.444  155.599
6  2024-09-16 06:30:00  155.601  155.621  155.261  155.441
7  2024-09-16 07:00:00  155.443  155.500  155.158  155.240
8  2024-09-16 07:30:00  155.238  155.516  155.162  155.471
9  2024-09-16 08:00:00  155.469  155.540  155.374  155.480
10 2024-09-16 08:30:00  155.478  155.598  155.448  155.550
11 2024-09-16 09:00:00  155.551  155.775  155.536  155.733
12 2024-09-16 09:30:00  155.725  155.974  155.692  155.910
13 2024-09-16 10:00:00  155.919  156.011  155.828  155.879
14 2024-09-16 10:30:00  155.885  155.909  155.758  155.845
15 2024-09-16 11:00:00  155.843  155.863  155.701  155.7

In [26]:
# Calculate SMA5 and RSI

# Function to calculate RSI
def calculate_rsi(data, window):
    delta = data.diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    RS = gain / loss
    return 100 - (100 / (1 + RS))

df_resampled['SMA_5'] = df_resampled['Close'].rolling(window=5).mean()
df_resampled['RSI'] = calculate_rsi(df_resampled['Close'], 14)  # Implement RSI

print(df_resampled.head(15))

                  Date     Open     High      Low    Close     SMA_5  \
0  2024-09-16 03:30:00  155.638  155.645  155.470  155.590       NaN   
1  2024-09-16 04:00:00  155.597  155.640  155.402  155.601       NaN   
2  2024-09-16 04:30:00  155.599  155.703  155.549  155.599       NaN   
3  2024-09-16 05:00:00  155.598  155.670  155.478  155.533       NaN   
4  2024-09-16 05:30:00  155.536  155.588  155.446  155.580  155.5806   
5  2024-09-16 06:00:00  155.586  155.632  155.444  155.599  155.5824   
6  2024-09-16 06:30:00  155.601  155.621  155.261  155.441  155.5504   
7  2024-09-16 07:00:00  155.443  155.500  155.158  155.240  155.4786   
8  2024-09-16 07:30:00  155.238  155.516  155.162  155.471  155.4662   
9  2024-09-16 08:00:00  155.469  155.540  155.374  155.480  155.4462   
10 2024-09-16 08:30:00  155.478  155.598  155.448  155.550  155.4364   
11 2024-09-16 09:00:00  155.551  155.775  155.536  155.733  155.4948   
12 2024-09-16 09:30:00  155.725  155.974  155.692  155.910  155.

In [41]:
# Labeling Data for Binary Option

df_resampled['Target'] = (df_resampled['Close'].shift(-6) > df_resampled['Close']).astype(int)

# Drop NaN values
df_resampled.dropna(inplace=True)
print(df_resampled['Date'].count())


<class 'numpy.int64'>


### Data processing

In [47]:
# Normalize the Data

# Features for the model
features = ['Open', 'High', 'Low', 'Close', 'SMA_5', 'RSI']

# Normalize the data
scaler = MinMaxScaler()
df_resampled[features] = scaler.fit_transform(df_resampled[features])

# Define X and y
X = df_resampled[features].values
y = df_resampled['Target'].values

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# timestep = 60
# X_train = X_train.reshape((X_train.shape[0], timestep, X_train.shape[1]))
# X_test = X_test.reshape((X_test.shape[0], timestep, X_test.shape[1]))
# y_train = y_train.reshape((y_train.shape[0], timestep, y_train.shape[1]))
# y_test = y_test.reshape((y_test.shape[0], timestep, y_test.shape[1]))


### LSTM Model for Sequential Prediction

In [52]:
# Initialize Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Predict on training set (or test set later)
rf_pred = rf_model.predict(X_train)

print(X_train.shape)

(100, 6)


In [55]:
# Reshape X_train for LSTM (samples, timesteps, features)
timesteps = 6
feature = len(features)

samples = int(X_train.shape[0] / (timesteps * feature))

# X_train_lstm = np.reshape(X_train, (X_train.shape[0], timesteps, feature))

# # Initialize LSTM
# lstm_model = Sequential()
# lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(timesteps, feature)))
# lstm_model.add(Dropout(0.2))
# lstm_model.add(LSTM(units=50, return_sequences=False))
# lstm_model.add(Dropout(0.2))
# lstm_model.add(Dense(units=1, activation='sigmoid'))

# # Compile the model
# lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# # Train LSTM
# lstm_model.fit(X_train_lstm, y_train, epochs=10, batch_size=32)