VERİ SETİ OLUŞTURMA

In [1]:
import pandas as pd
import numpy as np

# Rastgele veri üretimi için sabit seed
np.random.seed(42)

# Veri seti oluşturma
n_samples = 500
data = {
    'hava_durumu': np.random.choice(['güneşli', 'yağmurlu', 'karlı'], n_samples, p=[0.6, 0.3, 0.1]),
    'uyku_suresi': np.random.normal(7, 1.5, n_samples).clip(4, 10),
    'motivasyon': np.random.randint(1, 11, n_samples),
    'mesafe': np.random.uniform(0.5, 20, n_samples).round(1),
    'gecmis_katilim': np.random.randint(50, 100, n_samples),
}

# Katılım durumu (hedef değişken) oluşturma
probability = (
    0.7 * (data['motivasyon'] + data['uyku_suresi'] > 12) +
    0.3 * (np.array(data['mesafe']) < 10)
)
data['katilim'] = np.random.choice(['geldi', 'gelmedi'], n_samples, p=[probability.mean(), 1 - probability.mean()])

# DataFrame oluşturma
df = pd.DataFrame(data)
df.to_csv('ogrenci_katilim.csv', index=False)
print("Veri seti başarıyla oluşturuldu ve 'ogrenci_katilim.csv' dosyasına kaydedildi!")


Veri seti başarıyla oluşturuldu ve 'ogrenci_katilim.csv' dosyasına kaydedildi!


MODEL EĞİTİMİ VE KARŞILAŞTIRMA

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# AdaBoostClassifier'ı SAMME algoritmasıyla tanımlayın
model = AdaBoostClassifier(algorithm='SAMME', learning_rate=1.0)


# Veri yükleme
df = pd.read_csv('ogrenci_katilim.csv')

# Kategorik değişkenleri dönüştürme
encoder = OneHotEncoder()
hava_durumu_encoded = encoder.fit_transform(df[['hava_durumu']]).toarray()
hava_durumu_df = pd.DataFrame(hava_durumu_encoded, columns=encoder.get_feature_names_out(['hava_durumu']))

# Veri setini birleştirme
X = pd.concat([df[['uyku_suresi', 'motivasyon', 'mesafe', 'gecmis_katilim']], hava_durumu_df], axis=1)
y = df['katilim']

# Eğitim ve test ayrımı
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modellerin listesi
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Extra Trees": ExtraTreesClassifier(),
    "KNN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB(),
    "Neural Network": MLPClassifier(max_iter=300)
}

# Performans değerlendirme
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, pos_label='geldi')
    recall = recall_score(y_test, y_pred, pos_label='geldi')
    f1 = f1_score(y_test, y_pred, pos_label='geldi')
    results.append({"Model": name, "Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1-Score": f1})

# Sonuçları DataFrame olarak görüntüleme
results_df = pd.DataFrame(results)
print(results_df.sort_values(by="F1-Score", ascending=False))


                 Model  Accuracy  Precision    Recall  F1-Score
1                  SVM      0.58   0.580000  1.000000  0.734177
0  Logistic Regression      0.52   0.555556  0.862069  0.675676
8          Naive Bayes      0.53   0.567901  0.793103  0.661871
9       Neural Network      0.49   0.542169  0.775862  0.638298
4    Gradient Boosting      0.55   0.603175  0.655172  0.628099
7                  KNN      0.54   0.593750  0.655172  0.622951
3        Random Forest      0.53   0.587302  0.637931  0.611570
5             AdaBoost      0.51   0.571429  0.620690  0.595041
2        Decision Tree      0.54   0.615385  0.551724  0.581818
6          Extra Trees      0.50   0.574074  0.534483  0.553571


MODELİN KAYDEDİLMESİ

In [3]:
import joblib
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Veri yükleme
df = pd.read_csv('ogrenci_katilim.csv')

# Kategorik değişkenleri dönüştürme
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder()
hava_durumu_encoded = encoder.fit_transform(df[['hava_durumu']]).toarray()
hava_durumu_df = pd.DataFrame(hava_durumu_encoded, columns=encoder.get_feature_names_out(['hava_durumu']))

# Veri setini birleştirme
X = pd.concat([df[['uyku_suresi', 'motivasyon', 'mesafe', 'gecmis_katilim']], hava_durumu_df], axis=1)
y = df['katilim']

# Eğitim ve test ayrımı
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# SVM modelini oluşturma
svm_model = SVC(kernel='linear')  # Linear kernel kullanarak SVM modelini oluşturuyoruz
svm_model.fit(X_train, y_train)

# Modelin performansını değerlendirme
y_pred = svm_model.predict(X_test)
print("SVM Model Performansı:")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Precision: {precision_score(y_test, y_pred, pos_label='geldi')}")
print(f"Recall: {recall_score(y_test, y_pred, pos_label='geldi')}")
print(f"F1-Score: {f1_score(y_test, y_pred, pos_label='geldi')}")

# Modeli kaydetme
joblib.dump(svm_model, 'svm_model.joblib')


SVM Model Performansı:
Accuracy: 0.58
Precision: 0.58
Recall: 1.0
F1-Score: 0.7341772151898734


['svm_model.joblib']