## Imports
This section contains all the required imports for this model.

In [1]:
from sklearn.svm import SVC
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

## Data Pre-processing

In [2]:
df = pd.read_csv("schizophrenia_dataset.csv")
df.dropna(inplace=True)
df.rename(columns={
    "Hasta_ID": "Patient_ID",
    "Yaş": "Age",
    "Cinsiyet": "Gender",
    "Eğitim_Seviyesi": "Education_Level",
    "Medeni_Durum": "Marital_Status",
    "Meslek": "Occupation",
    "Gelir_Düzeyi": "Income_Level",
    "Yaşadığı_Yer": "Living_Area",
    "Tanı": "Diagnosis",
    "Hastalık_Süresi": "Disease_Duration",
    "Hastaneye_Yatış_Sayısı": "Hospitalizations",
    "Ailede_Şizofreni_Öyküsü": "Family_History",
    "Madde_Kullanımı": "Substance_Use",
    "İntihar_Girişimi": "Suicide_Attempt",
    "Pozitif_Semptom_Skoru": "Positive_Symptom_Score",
    "Negatif_Semptom_Skoru": "Negative_Symptom_Score",
    "GAF_Skoru": "GAF_Score",
    "Sosyal_Destek": "Social_Support",
    "Stres_Faktörleri": "Stress_Factors",
    "İlaç_Uyumu": "Medication_Adherence"
}, inplace=True)

def categorize_ages(age):
    if age < 18:
        return 1
    elif 18 <= age < 25:
        return 2
    elif 25 <= age < 35:
        return 3
    elif 35 <= age < 45:
        return 4
    elif 45 <= age < 60:
        return 5
    else:
        return 6

df['Age'] = df["Age"].apply(categorize_ages)

def categorize_positive_symptoms(symptoms):
    if symptoms <= 20:
        return 1
    elif 20 <= symptoms <= 40:
        return 2
    elif 40 <= symptoms <= 60:
        return 3
    elif 60 <= symptoms <= 80:
        return 4
    else:
        return 5

df['Positive_Symptom_Score'] = df["Positive_Symptom_Score"].apply(categorize_positive_symptoms)

def categorize_negative_symptoms(symptoms):
    if symptoms <= 20:
        return 1
    elif 20 <= symptoms <= 40:
        return 2
    elif 40 <= symptoms <= 60:
        return 3
    elif 60 <= symptoms <= 80:
        return 4
    else:
        return 5

df['Negative_Symptom_Score'] = df['Negative_Symptom_Score'].apply(categorize_negative_symptoms)

def categorize_gaf(score):
    if score <= 20:
        return 1
    elif 20 <= score <= 40:
        return 2
    elif 40 <= score <= 60:
        return 3
    elif 60 <= score <= 80:
        return 4
    else:
        return 5

df['GAF_Score'] = df['GAF_Score'].apply(categorize_gaf)

df.head()

Unnamed: 0,Patient_ID,Age,Gender,Education_Level,Marital_Status,Occupation,Income_Level,Living_Area,Diagnosis,Disease_Duration,Hospitalizations,Family_History,Substance_Use,Suicide_Attempt,Positive_Symptom_Score,Negative_Symptom_Score,GAF_Score,Social_Support,Stress_Factors,Medication_Adherence
0,1,6,1,4,2,0,2,1,0,0,0,0,0,0,2,3,4,0,2,2
1,2,5,1,5,2,2,1,0,1,35,1,1,1,1,3,4,2,2,2,0
2,3,5,1,5,3,2,1,0,1,32,0,1,0,0,4,5,3,0,1,1
3,4,6,1,3,2,0,2,0,0,0,0,0,1,0,1,2,4,1,1,2
4,5,5,0,1,2,0,2,1,0,0,0,0,0,0,1,2,5,0,1,0


## Data Fitting

In [3]:
# Define features and target
X = df[[
    "Age", "Gender", "Hospitalizations", "Occupation", "Education_Level",
    "Positive_Symptom_Score", "Negative_Symptom_Score", "GAF_Score",
    "Stress_Factors", "Medication_Adherence", "Family_History",
    "Social_Support", "Living_Area", "Suicide_Attempt"
]]
Y = df["Substance_Use"]

# Split data (use 20% test)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y)

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), ["Age", "Hospitalizations", "Positive_Symptom_Score", "Negative_Symptom_Score", "GAF_Score"]),
        ("cat", OneHotEncoder(handle_unknown="ignore"), ["Gender", "Living_Area"])]
)

# Apply preprocessing
X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

# Apply SMOTE to training data
smote = SMOTE(sampling_strategy="not majority")
X_resampled, y_resampled = smote.fit_resample(X_train_preprocessed, Y_train)

# Train SVM on resampled data
svc = SVC(kernel="linear", C=1)
svc.fit(X_resampled, y_resampled)

              precision    recall  f1-score   support

           0       0.85      0.78      0.81      1565
           1       0.39      0.51      0.44       435

    accuracy                           0.72      2000
   macro avg       0.62      0.64      0.63      2000
weighted avg       0.75      0.72      0.73      2000



## Predictions

In [4]:
y_pred = svc.predict(X_test_preprocessed)
print(classification_report(Y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.78      0.81      1565
           1       0.39      0.51      0.44       435

    accuracy                           0.72      2000
   macro avg       0.62      0.64      0.63      2000
weighted avg       0.75      0.72      0.73      2000

