In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix

In [None]:
df=pd.read_csv('/content/sample_data/quantvision_financial_dataset_200.csv') #loading data set


In [None]:
df=df.dropna().reset_index(drop=True) #dropping the empty rows in dataset

In [None]:
#assigning input features to x and target to y
x=df[["lookback_days","asset_type","market_regime","high_volatility","technical_score", "edge_density",
    "slope_strength", "candlestick_variance","pattern_symmetry"]]
y=df["future_trend"]

In [None]:
#classifying numeric and string inputs
numeric_features=["lookback_days","high_volatility","technical_score", "edge_density",
                  "slope_strength", "candlestick_variance","pattern_symmetry"]
categorical_features=["asset_type","market_regime"]

In [None]:
#standardises all input features
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(drop="first")

preprocess=ColumnTransformer(
    transformers=[
        ("num",numeric_transformer,numeric_features),
        ("cat",categorical_transformer,categorical_features)
    ]
)

In [None]:
#splitting respectively for training and testing purpose
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=35)

In [None]:
#implementing logistic regression by using pipeline
clf=Pipeline(steps=[("preprocessor",preprocess),("classifier",LogisticRegression(max_iter=800,random_state=35))])

In [None]:
#fitting the training samples in the pipeline
clf.fit(x_train,y_train)

In [None]:
#building the neural network model
def build_model(input_shape):
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=(input_shape,)),
        layers.Dense(64, activation='relu'),
        layers.Dense(1, activation='sigmoid')])
    loss_fn=keras.losses.BinaryCrossentropy()
    model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
    return model

In [None]:
#conversion of elements of dataset into tensors
x_train_processed = preprocess.fit_transform(x_train)
x_test_processed = preprocess.transform(x_test)

x_train_tf = tf.convert_to_tensor(x_train_processed, dtype=tf.float32)
y_train_tf = tf.convert_to_tensor(y_train, dtype=tf.float32)
x_test_tf = tf.convert_to_tensor(x_test_processed, dtype=tf.float32)
y_test_tf = tf.convert_to_tensor(y_test, dtype=tf.float32)

In [None]:
#creating model with the required number of features
model = build_model(x_train_processed.shape[1])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
#fitting training samples in neural network model
model.fit(x_train_tf, y_train_tf, epochs=200, batch_size=32,validation_split=0.2)

Epoch 1/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 105ms/step - accuracy: 0.4437 - loss: 0.7158 - val_accuracy: 0.9062 - val_loss: 0.5753
Epoch 2/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.8625 - loss: 0.5455 - val_accuracy: 0.9062 - val_loss: 0.4783
Epoch 3/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.9125 - loss: 0.4468 - val_accuracy: 0.9062 - val_loss: 0.4162
Epoch 4/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.9396 - loss: 0.3479 - val_accuracy: 0.9062 - val_loss: 0.3785
Epoch 5/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.9302 - loss: 0.3081 - val_accuracy: 0.9062 - val_loss: 0.3533
Epoch 6/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.9479 - loss: 0.2499 - val_accuracy: 0.9062 - val_loss: 0.3381
Epoch 7/200
[1m4/4[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x78ccaef2f740>

In [None]:
print("NEURAL NETWORK MODEL EVALUATION:\n")
# Predict probabilities
y_pred_prob=model.predict(x_test_tf)
# Convert probabilities to binary labels (0/1)
y_pred=(y_pred_prob >= 0.5).astype(int).reshape(-1)
y_true=y_test_tf.numpy()
accuracy=accuracy_score(y_true, y_pred)
precision=precision_score(y_true, y_pred)
recall=recall_score(y_true, y_pred)
f1=f1_score(y_true, y_pred)
cm=confusion_matrix(y_true, y_pred)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("\nConfusion Matrix:")
print(cm)

NEURAL NETWORK MODEL EVALUATION:

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
Accuracy: 0.95
Precision: 0.9736842105263158
Recall: 0.9736842105263158
F1-score: 0.9736842105263158

Confusion Matrix:
[[ 1  1]
 [ 1 37]]


In [None]:
print("LOGISTIC REGRESSION MODEL EVALUATION:")
y_pred=clf.predict(x_test)
accuracy=accuracy_score(y_test, y_pred)
precision=precision_score(y_test, y_pred)
recall=recall_score(y_test, y_pred)
f1=f1_score(y_test, y_pred)
cm=confusion_matrix(y_test, y_pred)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("\nConfusion Matrix:")
print(cm)

LOGISTIC REGRESSION MODEL EVALUATION:
Accuracy: 0.925
Precision: 0.9487179487179487
Recall: 0.9736842105263158
F1-score: 0.961038961038961

Confusion Matrix:
[[ 0  2]
 [ 1 37]]


***ANALYSIS AND FINANCIAL INTERPRETATION***
1. Logistic Regression uses a linear decision boundary but financial markets do not assume a linear decision boundary so it performs well only to a certain extent.
2. Neural Networks can work with non-linear relations well so we get better results. This is quite evident from the respective accuracies that we have calculated.
3. High volatility leads to failure in making the correct predictions because in these cases the market can respond as bullish or bearish, it is difficult to conclude.
4. Trend continuation makes it easy for our model to make the correct prediction.Hence it helps in the achieving high accuracy.
5. In case of high volatility, sideways market, candlesticks we are not familiar with , the model fails to take the right decision. Financial markets are affected by external events, some company related news but our model only relies on the statistics and does not understand the sentiments of the people.