In [1]:
#Importing our dependencies

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import make_classification
import pandas as pd

In [2]:
# Loading data into a pandas DataFrame 

data = pd.read_csv('../CSV_Output/AP_Data_Final.csv')
data

Unnamed: 0.1,Unnamed: 0,industry,wr1,wr2,wr3,wr4,vol1,vol2,vol3,vol4,...,profit_margin,free_cash_margin,volatility,cpi,interest_rate,unemployment_rate,sma,rsi,ema,label
0,0,Technology,0.018456,-0.022085,0.074509,0.008752,199900.0,158700.0,199800.0,158600.0,...,3.58,-5.59,0.032487,287.504,0.33,3.6,42.170000,51.771571,42.022083,1
1,1,Technology,0.077778,-0.024919,-0.043523,-0.030638,92500.0,25900.0,82400.0,16100.0,...,1.43,12.52,0.032998,256.143,2.38,3.6,19.163333,100.000000,19.139583,0
2,2,Consumer Discretionary,-0.009910,0.001704,0.018177,0.038614,536700.0,441600.0,924400.0,813700.0,...,12.61,18.30,0.018859,256.143,2.38,3.6,232.443333,41.526375,232.557500,1
3,3,Energy,0.033547,0.006861,0.011342,0.002944,1450900.0,846500.0,846500.0,1045700.0,...,41.22,39.26,0.026710,303.363,4.83,3.4,77.230000,76.412776,77.845833,1
4,4,Consumer Discretionary,-0.034738,0.004087,-0.001657,0.046206,961100.0,701700.0,786800.0,954900.0,...,4.81,11.13,0.018139,264.877,0.07,6.1,154.733333,19.569263,154.037083,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
310,413,Finance,-0.005778,0.004001,0.016268,0.015779,1758800.0,780600.0,1061200.0,1286800.0,...,10.51,-0.83,0.016265,241.428,0.40,5.0,49.856667,46.733668,49.832917,1
311,416,Telecommunications,-0.020692,-0.031341,-0.015429,0.013455,2703500.0,3270600.0,2822300.0,2214800.0,...,7.59,18.35,0.021450,252.006,1.91,3.8,26.050000,34.243176,26.117500,1
312,417,Technology,-0.072525,0.026684,0.015746,-0.013580,7335700.0,3964000.0,2172900.0,1624000.0,...,-93.82,13.29,0.028981,271.696,0.06,5.9,116.060000,24.607640,114.639167,1
313,418,Health Care,-0.016372,0.017014,0.004806,0.007593,1790500.0,2083700.0,4343200.0,2724700.0,...,4.29,5.16,0.019613,238.654,0.13,5.2,164.140000,46.861314,164.985417,0


In [3]:
#Doing One hot encoding for the 'Industry' column

data_encoded = pd.get_dummies(data, columns=['industry'])
data_encoded = data_encoded.dropna()
data_encoded

Unnamed: 0.1,Unnamed: 0,wr1,wr2,wr3,wr4,vol1,vol2,vol3,vol4,pe_ratio,...,industry_Consumer Discretionary,industry_Consumer Staples,industry_Energy,industry_Finance,industry_Health Care,industry_Industrials,industry_Real Estate,industry_Technology,industry_Telecommunications,industry_Utilities
0,0,0.018456,-0.022085,0.074509,0.008752,199900.0,158700.0,199800.0,158600.0,17.96,...,0,0,0,0,0,0,0,1,0,0
1,1,0.077778,-0.024919,-0.043523,-0.030638,92500.0,25900.0,82400.0,16100.0,13.81,...,0,0,0,0,0,0,0,1,0,0
2,2,-0.009910,0.001704,0.018177,0.038614,536700.0,441600.0,924400.0,813700.0,26.21,...,1,0,0,0,0,0,0,0,0,0
3,3,0.033547,0.006861,0.011342,0.002944,1450900.0,846500.0,846500.0,1045700.0,1.46,...,0,0,1,0,0,0,0,0,0,0
4,4,-0.034738,0.004087,-0.001657,0.046206,961100.0,701700.0,786800.0,954900.0,23.90,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
310,413,-0.005778,0.004001,0.016268,0.015779,1758800.0,780600.0,1061200.0,1286800.0,21.50,...,0,0,0,1,0,0,0,0,0,0
311,416,-0.020692,-0.031341,-0.015429,0.013455,2703500.0,3270600.0,2822300.0,2214800.0,-20.00,...,0,0,0,0,0,0,0,0,1,0
312,417,-0.072525,0.026684,0.015746,-0.013580,7335700.0,3964000.0,2172900.0,1624000.0,-19.30,...,0,0,0,0,0,0,0,1,0,0
313,418,-0.016372,0.017014,0.004806,0.007593,1790500.0,2083700.0,4343200.0,2724700.0,15.17,...,0,0,0,0,1,0,0,0,0,0


In [4]:
#Setting the X and Y variables

y = data_encoded["label"]
target_names = ["increase", "decrease"]
X = data_encoded.drop('label', axis=1)

In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [6]:
# Create a Gaussian Naive Bayes classifier
naive_bayes_classifier = GaussianNB()

In [7]:
# Train the classifier on the training data
naive_bayes_classifier.fit(X_train, y_train)

In [8]:
# Make predictions on the test data
y_pred = naive_bayes_classifier.predict(X_test)

In [9]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.48


In [10]:
# Display classification report
print("Classification Report:\n", classification_report(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.47      0.97      0.64        37
           1       0.67      0.05      0.09        42

    accuracy                           0.48        79
   macro avg       0.57      0.51      0.36        79
weighted avg       0.58      0.48      0.35        79

