In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
import joblib

# Veriyi yükle
df = pd.read_csv("healthcare-dataset-stroke-data.csv")

# 'id' ve 'stroke' dışındaki sütunları kullanacağız
X = df.drop(columns=["id", "stroke"])
y = df["stroke"]

# Eksik verileri kontrol et ve uygun şekilde doldur
X["bmi"] = X["bmi"].fillna(X["bmi"].mean())

# Kategorik ve sayısal sütunlar
categorical_cols = ["gender", "ever_married", "work_type", "Residence_type", "smoking_status"]
numerical_cols = ["age", "hypertension", "heart_disease", "avg_glucose_level", "bmi"]

# LabelEncoder ile string kategorileri encode et (manuel çünkü agent_svm.py bunu kullanıyor)
encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    encoders[col] = le

# Ölçeklendirme işlemi
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Modeli eğit
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
model = SVC(probability=True)
model.fit(X_train, y_train)

# Modeli ve scaler'ı kaydet
joblib.dump(model, "svm_stroke_model.joblib")
joblib.dump(scaler, "svm_stroke_scaler.joblib")

['svm_stroke_scaler.joblib']