In [None]:
# BAGIAN A — SETUP & LOADING DATASET
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.svm import SVC

# Upload file datakelulusanmahasiswa.csv lalu jalankan ini
df = pd.read_csv("datakelulusanmahasiswa.csv")

print("5 Data Teratas:")
display(df.head())

print("\nInformasi Dataset:")
print(df.info())

print("\nMissing Values:")
print(df.isnull().sum())

In [None]:
# BAGIAN B — EDA

print("Statistik Deskriptif:")
display(df.describe())

# Histogram IPK
plt.figure(figsize=(6,4))
sns.histplot(df['IPK'], kde=True)
plt.title("Distribusi IPK")
plt.show()

# Countplot Kelulusan
plt.figure(figsize=(6,4))
sns.countplot(x="Status_Lulus", data=df)
plt.title("Jumlah Lulus vs Tidak Lulus")
plt.show()

# Perbandingan IPK terhadap Kelulusan
plt.figure(figsize=(6,4))
sns.boxplot(x="Status_Lulus", y="IPK", data=df)
plt.title("Perbandingan IPK antara yang Lulus & Tidak Lulus")
plt.show()

In [None]:
# BAGIAN C — PREPROCESSING

df = df.dropna()

categorical_cols = df.select_dtypes(include=['object']).columns

label_encoders = {}
for col in categorical_cols:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

X = df.drop("Status_Lulus", axis=1)
y = df["Status_Lulus"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

len(X_train), len(X_test)

In [None]:
# BAGIAN D — TRAINING MODEL SVM

models = {
    "Linear C=1": SVC(kernel='linear', C=1),
    "RBF (C=1, gamma=scale)": SVC(kernel='rbf', C=1, gamma='scale')
}

for name, model in models.items():
    print(f"\n=== Training {name} ===")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print("Akurasi:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))

    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f"Confusion Matrix - {name}")
    plt.show()

In [None]:
# Tuning parameter sederhana

C_values = [0.1, 1, 10]
gamma_values = ['scale', 0.1, 1]

best_acc = 0
best_model = None

for C in C_values:
    for gamma in gamma_values:
        model = SVC(kernel='rbf', C=C, gamma=gamma)
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
        acc = accuracy_score(y_test, pred)

        print(f"C={C} | gamma={gamma} | Akurasi={acc}")

        if acc > best_acc:
            best_acc = acc
            best_model = model

print("\nModel terbaik memiliki akurasi:", best_acc)

In [None]:
# BAGIAN E — INTERPRETASI

print("Kesimpulan Umum:")
print("- Jika model linear lebih baik → data cenderung linear separable.")
print("- Jika RBF lebih baik → data lebih kompleks & butuh boundary non-linear.")

print("\nFitur Paling Berpengaruh:")
correlation = df.corr()['Status_Lulus'].sort_values(ascending=False)
display(correlation)

In [None]:
# BAGIAN F — DEPLOYMENT

def predict_status(ipk, sks, umur, lamastudi):
    data = pd.DataFrame([[ipk, sks, umur, lamastudi]],
                        columns=["IPK", "SKS", "Umur", "LamaStudi"])
    data_scaled = scaler.transform(data)

    pred = best_model.predict(data_scaled)
    label = label_encoders["Status_Lulus"].inverse_transform(pred)

    return label[0]

print("\nContoh Prediksi:")
print(predict_status(3.5, 140, 22, 4))