In [5]:
# 0} Importing Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, confusion_matrix, classification_report)



# 1} Loading the dataset
df = pd.read_csv('/content/quantvision_financial_dataset_200.csv')


# 2} Encoding Categorical variables
df_encoded = pd.get_dummies(df, columns=['asset_type', 'market_regime'], drop_first=True)

# 3} Defined Features (X) and Target (y)
X = df_encoded.drop('future_trend', axis=1)
y = df_encoded['future_trend']


# 4} Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5} Scaling numerical features

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


#6} Logistic Regression
lr_model = LogisticRegression(random_state=42)
lr_model.fit(X_train_scaled, y_train)
y_pred_lr = lr_model.predict(X_test_scaled)


#7} Neural Network
nn_model = MLPClassifier(hidden_layer_sizes=(32, 16),
                        activation='relu',
                        solver='adam',
                        max_iter=1000,
                        random_state=42)
nn_model.fit(X_train_scaled, y_train)
y_pred_nn = nn_model.predict(X_test_scaled)


def get_performance_metrics(y_true, y_pred, name):
    metrics = {
        "Model": name,
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred, zero_division=0),
        "Recall": recall_score(y_true, y_pred, zero_division=0),
        "F1-score": f1_score(y_true, y_pred, zero_division=0)
    }
    return metrics


lr_results = get_performance_metrics(y_test, y_pred_lr, "Logistic Regression")
nn_results = get_performance_metrics(y_test, y_pred_nn, "Neural Network (MLP)")

lr_ac = accuracy_score(y_test, y_pred_lr)
nn_ac = accuracy_score(y_test, y_pred_nn)

#8} Comparison Table
results_df = pd.DataFrame([lr_results, nn_results]).set_index("Model")
print("Comparing both the models:")
print(results_df.round(4))
print("-------------------------------------------------------------")

#9} Final Reports
print("=== Logistic Regression ===")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred_lr)}")
print(classification_report(y_test, y_pred_lr, zero_division=0))
print("-------------------------------------------------------------")

print("=== Neural Network ===")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred_nn)}")
print(classification_report(y_test, y_pred_nn, zero_division=0))
print("-------------------------------------------------------------")


if lr_ac>nn_ac:
  print("!!!Logistic Regression is better!!!")
elif nn_ac>lr_ac:
  print("!!!Neural Network is better!!!")
else:
  print("!!!Both are equal!!!")




Comparing both the models:
                      Accuracy  Precision  Recall  F1-score
Model                                                      
Logistic Regression      0.925     0.9487  0.9737     0.961
Neural Network (MLP)     0.975     0.9744  1.0000     0.987
-------------------------------------------------------------
=== Logistic Regression ===
Confusion Matrix:
[[ 0  2]
 [ 1 37]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.95      0.97      0.96        38

    accuracy                           0.93        40
   macro avg       0.47      0.49      0.48        40
weighted avg       0.90      0.93      0.91        40

-------------------------------------------------------------
=== Neural Network ===
Confusion Matrix:
[[ 1  1]
 [ 0 38]]
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.97      1.00      0.99     

1.**Why Logistic Regression performs reasonably good or bad?**

*Answer*: Logistic Regression works better on a linear dataset.If the relationship was linear, LR would have outperformed or matched NN.

2.**Why Neural Network performs better or worse?**

*Answer*: Neural Network performs better as the given dataset consists of volatility, trend continuation, symmetry and other factors which makes it a non-linear complex datatset which is too noisy and neural network is very well capable of handling this type of datasets as it has multiple hidden layers which helps it to deal with this dataset more easily. It also uses ReLU function making it more compatible for non-linear datasets.

3.**The effect of volatility on predictions...**

*Answer*:Volatility leads to unpredictability and sudden price fluctuations. This can increase the chances of wrong results. Neural networks can handle this type of data better.

4.**The role of trend continuation...**

*Answer*:It leads to more accuracy in both the models as it leads to contuinity in trends making it to easier predictions. Neural Network includes other factors for better results.

5.**Situations where the model fails and why?**

*Answer*:As discussed above the model can fail in high volatile data, also sudden changes can affect its results strongly.
Also, when the network tries to learn too much or too many details in the training data along with the noise from the training data which results in poor performance on unseen or test dataset. When this happens the network fails to generalize the pattern found in the training data.