Import the neccessary libraries

In [63]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import recall_score,f1_score,r2_score,accuracy_score,mean_squared_error,precision_score

Data Loading

In [64]:
print("Loading data from audusd.csv......")

try:
    df = pd.read_csv("audusd.csv")
    print("Data loaded successfully!")
    print("First five rows of the dataset")
    print(df.head().to_string())
except FileNotFoundError:
    print("Error: The file 'forex-1year.csv' was not found. Please ensure it is in the same directory.")
    exit()

Loading data from audusd.csv......
Data loaded successfully!
First five rows of the dataset
         date     clock  open_eurusd  high_eurusd  low_eurusd  close_eurusd  tikvol_eurusd  open_audusd  high_audusd  low_audusd  close_audusd  tikvol_audusd  open_eurgbp  high_eurgbp  low_eurgbp  close_eurgbp  tikvol_eurgbp  open_gbpusd  high_gbpusd  low_gbpusd  close_gbpusd  tikvol_gbpusd  open_nzdusd  high_nzdusd  low_nzdusd  close_nzdusd  tikvol_nzdusd  open_usdchf  high_usdchf  low_usdchf  close_usdchf  tikvol_usdchf  open_usdjpy  high_usdjpy  low_usdjpy  close_usdjpy  tikvol_usdjpy
0  2023.04.24  00:00:00      1.09883      1.09904     1.09861       1.09863            552      0.66947      0.66968     0.66906       0.66926            422      0.88273      0.88321     0.88257       0.88267            373      1.24412      1.24442     1.24361       1.24405            328      0.61382      0.61404     0.61346       0.61381            289      1.09883      1.09904     1.09861       1.09863     

Data Preprocessing and Feature Engineering

In [65]:
# Check for missing values
df_missing = df.isnull().sum()
print("Missing values")
print(df_missing)

# Check for duplicate rows
df_duplicated = df.duplicated().sum()
print("Duplcated rows")
print(df_duplicated)

# Define the features (X) and target (y)
X = df[["open_audusd","high_audusd","low_audusd"]]
y = df["close_audusd"]

print("Shape of features (X):",X.shape)
print("Shape of target (y):",y.shape)

Missing values
date             0
clock            0
open_eurusd      0
high_eurusd      0
low_eurusd       0
close_eurusd     0
tikvol_eurusd    0
open_audusd      0
high_audusd      0
low_audusd       0
close_audusd     0
tikvol_audusd    0
open_eurgbp      0
high_eurgbp      0
low_eurgbp       0
close_eurgbp     0
tikvol_eurgbp    0
open_gbpusd      0
high_gbpusd      0
low_gbpusd       0
close_gbpusd     0
tikvol_gbpusd    0
open_nzdusd      0
high_nzdusd      0
low_nzdusd       0
close_nzdusd     0
tikvol_nzdusd    0
open_usdchf      0
high_usdchf      0
low_usdchf       0
close_usdchf     0
tikvol_usdchf    0
open_usdjpy      0
high_usdjpy      0
low_usdjpy       0
close_usdjpy     0
tikvol_usdjpy    0
dtype: int64
Duplcated rows
0
Shape of features (X): (6235, 3)
Shape of target (y): (6235,)


Data Splitting

In [66]:
# We split the data into a training set and a testing set. The model learns
# from the training data and is then evaluated on the testing data. We'll use the 70/30 split.

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

print("Number of samples in training set:",len(X_train))
print("Number of samples in testing set:",len(X_test))

Number of samples in training set: 4364
Number of samples in testing set: 1871


Model Training

In [67]:
# We create an instance of the RandomForestRegressor model and train it using the
# training data. We use n_estimators=100, which means the model will build 100
# decision trees

print("Training the Random Forest Regressor model......")
model = RandomForestRegressor(n_estimators=100,random_state=42)
model.fit(X_train,y_train)
print("Model training complete!")

Training the Random Forest Regressor model......
Model training complete!


Model Evaluation

In [68]:
# Now we make predictions on the test set and evaluate the models performance
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test,y_pred)
r2 = r2_score(y_test,y_pred)

print("Mean Squared Error:",mse)
print("R-Squared:",r2)

Mean Squared Error: 1.7623895760555582e-07
R-Squared: 0.9990050034773256


Making a New Prediction

In [69]:
print("Enter the following values to predict the Close price")
while True:
    try:
        open_price_input = input("Enter the Open price(or type 'exit' to quit):")
        if open_price_input.lower() == "exit":
            break
        open_price = float(open_price_input)
        high_price = float(input("Enter the High price:"))
        low_price = float(input("Enter the Low price:"))

        # We must reshape the input to a 2D array, even for a single sample
        # The order of the features must match the order used during training
        new_prices = np.array([[open_price,high_price,low_price]])

        predicted_price = model.predict(new_prices)

        print(f"For the given prices, the predicted Close price is: {predicted_price[0]:.4f}")

    except ValueError:
        print("Invalid Input. Please enter valid numbers for all three fields")