In [1]:
# =================================================================
# Tahapan 1: Business Understanding (Pemahaman Bisnis)
# =================================================================

import pandas as pd
import numpy as np
import re
import joblib 

# --- SOLUSI ERROR: Tambahkan 2 baris ini ---
from sklearn.experimental import enable_iterative_imputer
# ------------------------------------------

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.impute import IterativeImputer # <-- Import ini sekarang berfungsi
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [2]:
# =================================================================
# Tahapan 2: Data Understanding (Pemahaman Data)
# =================================================================
file_name = "Dataset Pelatihan.csv"
df = pd.read_csv(file_name, delimiter=';')

print("--- Data Understanding: Inspeksi Data Awal ---")
print(f"Bentuk data: {df.shape}")
print("\n5 Baris Data Pertama:")
print(df.head().to_markdown(index=False))
print("\nInformasi Tipe Data dan Missing Value Awal:")
df.info()

--- Data Understanding: Inspeksi Data Awal ---
Bentuk data: (100, 9)

5 Baris Data Pertama:
|   ID | Nama   |   Umur | Gender    | Nilai    | Matkul     | Tanggal    |   UTS |   UAS |
|-----:|:-------|-------:|:----------|:---------|:-----------|:-----------|------:|------:|
|    1 | Budi   |     24 | Laki-laki | 85       | Kimia      | 09/08/2023 |    90 |    80 |
|    2 | Ani    |     21 | Perempuan | 77.05.00 | Matematika | 15/08/2023 |    75 |    80 |
|    3 | Joko   |     20 | Laki-laki | 90       | Biologi    | 15/08/2023 |    85 |    95 |
|    4 | Siti   |     21 | Perempuan | 60       | Matematika | 09/08/2023 |    55 |    65 |
|    5 | Agus   |     23 | Laki-laki | 77.05.00 | Fisika     | 09/08/2023 |    80 |    75 |

Informasi Tipe Data dan Missing Value Awal:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 9 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   ID       100 non-null    int64 
 1   

In [3]:
# =================================================================
# Tahapan 3: Data Preparation (Persiapan Data)
# Menggunakan Pipeline untuk pemrosesan sistematis
# =================================================================

# 1. Pembersihan Khusus (Cleaning)
df.drop_duplicates(inplace=True)

def clean_nilai(value):
    if pd.isna(value) or value == '': return None
    try:
        s_value = str(value).strip()
        s_value = re.sub(r'(\.\d{1,2}\.0{1,2})', '', s_value)
        s_value = re.sub(r'(\.\d{1,2})', '', s_value)
        s_value = re.sub(r'[a-zA-Z\s]', '', s_value)
        if '.' in s_value: s_value = s_value.split('.')[0]
        if s_value.isdigit(): return float(s_value)
        return None
    except Exception: return None

df['Nilai_clean'] = df['Nilai'].apply(clean_nilai)

# 2. Pembuatan Target dan Pembuangan Kolom
df['Lulus'] = (df['Nilai_clean'] >= 75).astype(int)
df.drop(columns=['Nilai_clean', 'Nilai', 'Nama', 'ID', 'Tanggal'], inplace=True) 

X = df.drop(columns=['Lulus'])
y = df['Lulus']

# Membagi Data sebelum Transformasi
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Data Training sebelum transformasi: {X_train.shape[0]} baris")

# 3. Definisikan Transformer
numerical_features = ['Umur', 'UTS', 'UAS']
categorical_features = ['Gender', 'Matkul']

# Pipeline Numerik: Imputasi Iteratif (menangani outlier secara implisit) dan Scaling
numerical_transformer = Pipeline(steps=[
    ('imputer', IterativeImputer(estimator=LogisticRegression(), max_iter=10, random_state=42)),
    ('scaler', StandardScaler())
])

# Pipeline Kategorikal: One Hot Encoding
categorical_transformer = OneHotEncoder(handle_unknown='ignore', drop='first')

# Menggabungkan Transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ],
    remainder='passthrough'
)

# 4. Aplikasi Transformasi
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

print("\nData Preparation Selesai. Data siap untuk Modeling.")

Data Training sebelum transformasi: 70 baris

Data Preparation Selesai. Data siap untuk Modeling.


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [4]:
# =================================================================
# Tahapan 4: Modeling (Pemodelan)
# =================================================================

# Algoritma: Logistic Regression
model = LogisticRegression(solver='liblinear', random_state=42)
model.fit(X_train_processed, y_train)

print("Model Logistic Regression berhasil dilatih pada data training yang sudah diproses.")

Model Logistic Regression berhasil dilatih pada data training yang sudah diproses.


In [5]:
# =================================================================
# Tahapan 5: Evaluation (Evaluasi)
# =================================================================

# Prediksi pada data testing
y_pred = model.predict(X_test_processed)

# Mendapatkan metrik evaluasi
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("--- Hasil Evaluasi Model ---")
print(f"Akurasi Model pada Data Testing: {accuracy:.4f}")
print("\nConfusion Matrix:")
print(pd.DataFrame(conf_matrix, index=['Actual Gagal (0)', 'Actual Lulus (1)'], columns=['Prediksi Gagal (0)', 'Prediksi Lulus (1)']).to_markdown())
print("\nClassification Report:")
print(class_report)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


--- Hasil Evaluasi Model ---
Akurasi Model pada Data Testing: 0.9667

Confusion Matrix:
|                  |   Prediksi Gagal (0) |   Prediksi Lulus (1) |
|:-----------------|---------------------:|---------------------:|
| Actual Gagal (0) |                    0 |                    1 |
| Actual Lulus (1) |                    0 |                   29 |

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.97      1.00      0.98        29

    accuracy                           0.97        30
   macro avg       0.48      0.50      0.49        30
weighted avg       0.93      0.97      0.95        30



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [6]:
# =================================================================
# Tahapan 6: Deployment (Penerapan)
# =================================================================

# 1. Menyimpan Model dan Preprocessor
model_filename = 'model_logreg_kelulusan.pkl'
preprocessor_filename = 'preprocessor_deployment.pkl'

# Menyimpan model (Algoritma)
joblib.dump(model, model_filename)
# Menyimpan preprocessor (Termasuk scaling dan encoding yang sudah fit)
joblib.dump(preprocessor, preprocessor_filename)

print(f"Model berhasil disimpan sebagai: {model_filename}")
print(f"Preprocessor berhasil disimpan sebagai: {preprocessor_filename}")

# 2. Simulasi Prediksi Real-Time (Antarmuka)
print("\n--- Simulasi Deployment ---")

# Muat Model dan Preprocessor yang sudah disimpan
model_loaded = joblib.load(model_filename)
preprocessor_loaded = joblib.load(preprocessor_filename)

# Contoh Input Baru dari Pengguna
data_input_new = {'Umur': 23, 'UTS': 68, 'UAS': 75, 'Gender': 'Perempuan', 'Matkul': 'Kimia'}

# Ubah Input ke DataFrame
input_df = pd.DataFrame([data_input_new])

# Aplikasikan Preprocessor (Tanpa fit, hanya transform)
# Preprocessor akan melakukan scaling dan encoding yang benar secara otomatis
input_processed = preprocessor_loaded.transform(input_df)

# Prediksi
prediction_class = model_loaded.predict(input_processed)[0]
prediction_proba = model_loaded.predict_proba(input_processed)[0]

status = 'LULUS' if prediction_class == 1 else 'GAGAL'

print(f"Input Data: Umur {data_input_new['Umur']}, UTS {data_input_new['UTS']}, UAS {data_input_new['UAS']}")
print(f"HASIL PREDIKSI: {status} (Probabilitas Lulus: {prediction_proba[1]:.2f})")

Model berhasil disimpan sebagai: model_logreg_kelulusan.pkl
Preprocessor berhasil disimpan sebagai: preprocessor_deployment.pkl

--- Simulasi Deployment ---
Input Data: Umur 23, UTS 68, UAS 75
HASIL PREDIKSI: LULUS (Probabilitas Lulus: 0.63)


In [7]:
import gradio as gr
import pandas as pd
import joblib
import os
import numpy as np

MODEL_PATH = "/mnt/data/reg_pipeline_nilai.joblib"

def load_model():
    if os.path.exists(MODEL_PATH):
        try:
            return joblib.load(MODEL_PATH)
        except Exception as e:
            print("Gagal load model:", e)
            return None
    return None

model = load_model()

def predict(umur, jk, kehadiran, uts, uas, partisipasi):
    X = pd.DataFrame({
        "Umur":[umur],
        "JenisKelamin":[jk],
        "Kehadiran":[kehadiran],
        "UTS":[uts],
        "UAS":[uas],
        "Partisipasi":[partisipasi]
    })
    if model is not None:
        try:
            pred = model.predict(X)[0]
        except Exception as e:
            return f"Error model: {e}"
    else:
        pred = 0.4*uts + 0.5*uas + 0.1*partisipasi*10   # dummy fallback

    if pred < 60:
        rec = "Remedial intensif + monitoring mingguan"
    elif pred < 75:
        rec = "Bimbingan tambahan dan tugas latihan"
    else:
        rec = "Pertahankan, beri tantangan lebih lanjut"

    return round(pred,2), rec

title = "Prediksi Nilai Akhir - Gradio"
description = "Masukkan data peserta untuk mendapatkan prediksi nilai akhir dan rekomendasi."

iface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Number(label="Umur", value=20),
        gr.Radio(["Laki-laki","Perempuan"], label="Jenis Kelamin"),
        gr.Slider(0, 100, label="Kehadiran (%)", value=90),
        gr.Number(label="Nilai UTS", value=70),
        gr.Number(label="Nilai UAS", value=75),
        gr.Slider(0, 10, label="Partisipasi (0-10)", value=7)
    ],
    outputs=[
        gr.Number(label="Nilai Prediksi"),
        gr.Textbox(label="Rekomendasi")
    ],
    title=title,
    description=description
)

if __name__ == "__main__":
    iface.launch()


  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.
