In [24]:
# QuantVision Mid-Eval Code
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report)
import tensorflow as tf
from tensorflow import keras

df = pd.read_csv("quantvision_financial_dataset_200.csv")

df = df.dropna().reset_index(drop=True)

X = df.drop(columns=["future_trend"])
y = df["future_trend"].astype(int)

categorical_cols = ["asset_type", "market_regime"]
binary_cols = ["high_volatility", "trend_continuation"]
numeric_cols = [c for c in X.columns if c not in categorical_cols + binary_cols]

preprocess = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
    ("num", StandardScaler(), numeric_cols),
    ("bin", "passthrough", binary_cols)
])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [25]:
logreg = Pipeline([
    ("preprocess", preprocess),
    ("model", LogisticRegression(max_iter=2000))
])

logreg.fit(X_train, y_train)
y_pred_lr = logreg.predict(X_test)


In [26]:

X_train_p = preprocess.fit_transform(X_train)
X_test_p = preprocess.transform(X_test)

if hasattr(X_train_p, "toarray"):
    X_train_p = X_train_p.toarray()
    X_test_p = X_test_p.toarray()

X_train_p = X_train_p.astype(np.float32)
X_test_p = X_test_p.astype(np.float32)

mlp = keras.Sequential([
    keras.layers.Input(shape=(X_train_p.shape[1],)),
    keras.layers.Dense(32, activation="relu"),
    keras.layers.Dense(16, activation="relu"),
    keras.layers.Dense(1, activation="sigmoid")
])

mlp.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

mlp.fit(
    X_train_p, y_train,
    validation_split=0.3,
    epochs=50,
    batch_size=32,
    verbose=1
)

y_pred_nn = (mlp.predict(X_test_p).ravel() >= 0.5).astype(int)


Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 72ms/step - accuracy: 0.3891 - loss: 0.7412 - val_accuracy: 0.6458 - val_loss: 0.6731
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.5979 - loss: 0.6837 - val_accuracy: 0.7292 - val_loss: 0.6348
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.7685 - loss: 0.6322 - val_accuracy: 0.8125 - val_loss: 0.6004
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.8668 - loss: 0.5855 - val_accuracy: 0.8750 - val_loss: 0.5699
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.8872 - loss: 0.5550 - val_accuracy: 0.8958 - val_loss: 0.5421
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.9546 - loss: 0.5149 - val_accuracy: 0.9167 - val_loss: 0.5166
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━

In [27]:

def evaluate_model(name, y_true, y_pred):
    print(f"\n{name}")
    print("Accuracy :", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, zero_division=0))
    print("Recall   :", recall_score(y_true, y_pred, zero_division=0))
    print("F1-score :", f1_score(y_true, y_pred, zero_division=0))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))

print("=== Logistic Regression Results ===")
evaluate_model("Logistic Regression", y_test, y_pred_lr)

print("\n=== Neural Network Results ===")
evaluate_model("Neural Network (MLP)", y_test, y_pred_nn)


=== Logistic Regression Results ===

Logistic Regression
Accuracy : 0.925
Precision: 0.925
Recall   : 1.0
F1-score : 0.961038961038961
Confusion Matrix:
 [[ 0  3]
 [ 0 37]]

=== Neural Network Results ===

Neural Network (MLP)
Accuracy : 0.925
Precision: 0.925
Recall   : 1.0
F1-score : 0.961038961038961
Confusion Matrix:
 [[ 0  3]
 [ 0 37]]


In [28]:
comparison = pd.DataFrame({
        "Model": ["Logistic Regression", "Neural Network"],
    "Accuracy": [lr_results["Accuracy"], nn_results["Accuracy"]],
    "Precision": [lr_results["Precision"], nn_results["Precision"]],
    "Recall": [lr_results["Recall"], nn_results["Recall"]],
    "F1-score": [lr_results["F1-score"], nn_results["F1-score"]]
})

comparison


Unnamed: 0,Model,Accuracy,Precision,Recall,F1-score
0,Logistic Regression,0.925,0.925,1.0,0.961039
1,Neural Network,0.925,0.925,1.0,0.961039


Analysis & Financial Interpretation

Logistic regression performs well as a baseline because features like trend_continuation and technical_score have an almost linear relationship with future price. It works best in stable markets but struggles during volatile or varying conditions due to its linear nature.

The neural network generally performs better because it captures non-linear interactions between indicators such as volatility, candlestick variance, and pattern symmetry. This allows it to handle complex market behavior more effectively, although sudden changes can still reduce accuracy.

High volatility increases errors for both models. The neural network handles volatility slightly better than Logistic Regression.

Trend continuation is a strong positive indicator for both models. Failures mainly occur during sideways markets, sudden trend reversals, and extreme volatility.

Overall, logistic regression provides interpretability, while neural network offers better predictive performance.

