In [1]:
import numpy as np
import pandas as pd
import joblib
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# -------------------------------------------
# 1. Load Data dari CSV
# -------------------------------------------
data_path = 'content/cleaned_data_kelulusan.csv'  # Pastikan path sesuai dengan lokasi file dataset Anda
df = pd.read_csv(data_path)

# Tampilkan informasi dataset (optional)
print(df.head())
print(df.info())

# -------------------------------------------
# 2. Membuat Kolom Target 'lulus_tepat_waktu'
# -------------------------------------------
# Kriteria:
# - Jika Masa Studi <= 4, maka dianggap lulus tepat waktu (1), jika lebih dianggap tidak (0)
df['lulus_tepat_waktu'] = (df['Masa Studi'] <= 4).astype(int)

# -------------------------------------------
# 3. Memilih Fitur yang Akan Digunakan (IPS1 sampai IPS7)
# -------------------------------------------
feature_columns = ['IPS1', 'IPS2', 'IPS3', 'IPS4', 'IPS5', 'IPS6', 'IPS7']
X = df[feature_columns]
y = df['lulus_tepat_waktu']

# Menentukan nilai minimum IPS agar bisa lulus tepat waktu secara otomatis dari dataset
df_lulus = df[df['lulus_tepat_waktu'] == 1]
min_ips_lulus = df_lulus[feature_columns].min().to_dict()

# -------------------------------------------
# 4. Membagi Data: Train & Test
# -------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42, 
    stratify=y
)

# -------------------------------------------
# 5. Scaling (Standarisasi)
# -------------------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -------------------------------------------
# 6. Melatih Model SVM dengan Class Weight Balanced
# -------------------------------------------
svm_model = SVC(kernel='rbf', C=5.0, gamma='scale', probability=True, class_weight='balanced')
svm_model.fit(X_train_scaled, y_train)

# -------------------------------------------
# 7. Evaluasi Model
# -------------------------------------------
y_pred = svm_model.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# -------------------------------------------
# 8. Menyimpan Model & Scaler
# -------------------------------------------
model_dir = "model_trained"
os.makedirs(model_dir, exist_ok=True)

joblib.dump(svm_model, os.path.join(model_dir, 'svm_model.pkl'))
joblib.dump(scaler, os.path.join(model_dir, 'scaler.pkl'))
joblib.dump(min_ips_lulus, os.path.join(model_dir, 'min_ips_lulus.pkl'))

print("Model, Scaler, dan Minimum IPS telah disimpan di 'model_trained/'")

        NIM                         NAMA JK               Asal Sekolah  \
0  18090044                Ilham Akhsani  L  Smk Maarif Nu 03 Larangan   
1  18090057           Qirani Rifa Annisa  P              Smk N 3 Tegal   
2  18090090                Rina Listiana  P              Smk N 1 Slawi   
3  18090095  Fauziah Nur Zaerina Eriyadi  P             Sma N 2 Brebes   
4  18090138          Hafizh Umar Syafiqh  L            Man 1 Tangerang   

      Jurusan  Tahun Lulus  Tahun Masuk D4  Masa Studi  \
0  Multimedia         2018            2018         4.0   
1         Tkj         2018            2018         4.0   
2         Tkj         2018            2018         4.0   
3         Ips         2017            2018         4.0   
4         Ipa         2018            2018         4.0   

   Usia pada saat Lulus D4  IPS1  IPS2  IPS3  IPS4  IPS5  IPS6  IPS7  IPK8  \
0                      NaN  3.57   3.9  3.48  3.77  3.57  3.57  3.25  3.60   
1                      NaN  3.71   3.9  3.65  3.86