In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statistics
import pandas as pd
from scipy import stats
import math
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

In [2]:
data = {
    'Jenis Kelamin': ['L', 'L', 'P', 'P', 'L'],
    'Status Mhs': ['Reguler', 'Bekerja', 'Reguler', 'Reguler', 'Bekerja'],
    'Status': ['Lajang', 'Lajang', 'Lajang', 'Menikah', 'Menikah'],
    'IPK': [3.17, 3.30, 3.01, 3.25, 3.01],
    'Status Lulus': ['Tepat', 'Tepat', 'Terlambat', 'Tepat', 'Terlambat']
}
df = pd.DataFrame(data)
df

Unnamed: 0,Jenis Kelamin,Status Mhs,Status,IPK,Status Lulus
0,L,Reguler,Lajang,3.17,Tepat
1,L,Bekerja,Lajang,3.3,Tepat
2,P,Reguler,Lajang,3.01,Terlambat
3,P,Reguler,Menikah,3.25,Tepat
4,L,Bekerja,Menikah,3.01,Terlambat


In [3]:
# Encode categorical features and target
from sklearn.preprocessing import LabelEncoder

# Copy df to avoid modifying original
df_encoded = df.copy()

# Encode all categorical columns except 'IPK'
label_encoders = {}
for col in ['Jenis Kelamin', 'Status Mhs', 'Status', 'Status Lulus']:
    le = LabelEncoder()
    df_encoded[col] = le.fit_transform(df_encoded[col])
    label_encoders[col] = le

In [8]:
label_encoders

{'Jenis Kelamin': LabelEncoder(),
 'Status Mhs': LabelEncoder(),
 'Status': LabelEncoder(),
 'Status Lulus': LabelEncoder()}

In [4]:
df_encoded['IPK'] = df_encoded['IPK'].astype(float)

In [5]:
df_encoded

Unnamed: 0,Jenis Kelamin,Status Mhs,Status,IPK,Status Lulus
0,0,1,0,3.17,0
1,0,0,0,3.3,0
2,1,1,0,3.01,1
3,1,1,1,3.25,0
4,0,0,1,3.01,1


In [6]:
# Prepare features and target
X = df_encoded.drop('Status Lulus', axis=1)
y = df_encoded['Status Lulus']

# Train Gaussian Naive Bayes classifier
gnb = GaussianNB()
gnb.fit(X, y)

In [7]:
# Predict on the same data (for demonstration)
predictions = gnb.predict(X)
print("Predictions:", predictions)

Predictions: [0 0 1 0 1]


In [9]:
# Convert numeric predictions back to original text labels
predictions_text = label_encoders['Status Lulus'].inverse_transform(predictions)
print("Predictions (text):", predictions_text)

Predictions (text): ['Tepat' 'Tepat' 'Terlambat' 'Tepat' 'Terlambat']


In [11]:
# Prepare the input data for prediction
input_dict = {
    'Jenis Kelamin': ['P'],
    'Status Mhs': ['Bekerja'],
    'Status': ['Lajang'],
    'IPK': [3.17]
}
input_df = pd.DataFrame(input_dict)

# Encode categorical features using the fitted label_encoders
for col in ['Jenis Kelamin', 'Status Mhs', 'Status']:
    input_df[col] = label_encoders[col].transform(input_df[col])

# Ensure correct column order
input_df = input_df[X.columns]

# Predict using the trained GaussianNB model
input_pred = gnb.predict(input_df)
input_pred_text = label_encoders['Status Lulus'].inverse_transform(input_pred)
print("Prediction:", input_pred_text[0])

Prediction: Tepat
