In [None]:
import pandas as pd

filename = "data.csv"
df = pd.read_csv(filename)
df.drop(["relation_label_of_suicide"], axis = 1) # Elliminate non-numerical data

In [None]:
n_feat = 12 # Choose between "All", 12, 8 or 1

if n_feat == "All":
    X = df.drop(["target_label", "rule_based_output"], axis=1)
    # Elliminating golden standard and rule-based output
    # Importance levels 1, 2, 3 and 4
elif n_feat == 12:
     X = df.drop([
        "target_label", "rule_based_output", 
        "gender", "ideation", "hopelessness", 
        "exposure_to_suicide", "suicidal_desire"
    ], axis=1)
    # Elliminating (also) gender, suicide ideation, hopelesness, suicides of people near, and suicide desire
    # Importance levels 1, 2, and 3
elif n_feat == 8:
    X = df.drop([
        "target_label", "rule_based_output", 
        "gender", "ideation", "hopelessness", 
        "exposure_to_suicide", "suicidal_desire", 
        "depression_score", "self_harm", "age", 
        "pain_tolerance"
    ], axis=1)
    # Elliminating (also) depression, self-harm, age and tolerance for mental pain
    # Importance levels 1 and 2
elif n_feat == 1:
    X = df.loc[:, df.columns == "SCS"]
    # Only SCS-R (1 feature)
    # Importance level 1

y = df.loc[:, df.columns == "target_label"]

In [3]:
# Load all models
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn import svm

In [4]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import matplotlib.pyplot as plt

In [None]:
pred = []
real = []
Xtr = []
realtr = []
predtr = []
for i in range(len(X.values)):
    # data choice
    X_train = X.drop(i)
    y_train = y.drop(i)
    X_test = [X.loc[i]]
    y_test = y.loc[i]

    # model definition and training - UNCOMMENT ONLY ONE OF THE FOLLOWING clf = 
    clf = DecisionTreeClassifier(criterion = "entropy")
    clf = RandomForestClassifier(criterion = "entropy")
    clf = ExtraTreesClassifier(criterion = "entropy")
    clf = XGBClassifier()
    clf = LinearRegression()
    clf = LogisticRegression()
    clf = svm.SVC(kernel='linear')
    clf = GaussianNB() 

    clf.fit(X_train, y_train)

    # test evaluation
    b = clf.predict(X_test)
    
    pred.append(b[0])
    real.append(y_test[0])

In [None]:
print(classification_report(real,pred))
c = confusion_matrix(real,pred)
disp = ConfusionMatrixDisplay(c)
disp.plot()
plt.show()