In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('FSVA 2022 Cleaned.csv')
df

Unnamed: 0,Komposit,NCPR,Kemiskinan (%),Pengeluaran Pangan (%),Tanpa Listrik (%),Tanpa Air Bersih (%),Lama Sekolah Perempuan (tahun),Rasio Tenaga Kesehatan,Angka Harapan Hidup (tahun),Stunting (%)
0,5,0.78,13.18,52.86,0.00,46.34,8.75,2.43,64.40,27.3
1,6,0.28,13.41,38.71,0.00,41.55,9.62,2.87,68.22,34.1
2,6,0.52,14.45,32.76,0.00,33.96,8.66,2.01,68.74,38.2
3,4,2.57,15.26,28.92,0.24,42.25,10.10,2.45,68.86,34.3
4,6,0.56,18.81,38.49,0.00,21.76,10.22,1.82,67.99,27.4
...,...,...,...,...,...,...,...,...,...,...
509,1,5.00,33.86,33.04,2.11,68.12,8.50,63.35,60.20,39.4
510,1,5.00,31.39,7.59,0.22,61.90,10.50,22.57,65.25,34.5
511,5,0.98,29.30,21.87,3.25,27.52,8.60,14.88,67.60,28.5
512,1,5.00,34.70,23.48,21.62,51.51,6.41,20.40,67.26,40.1


In [3]:
# Memisahkan fitur (X) dan target (y)
X = df.drop('Komposit', axis=1)
y = df['Komposit']

In [4]:


# Memisahkan data menjadi data pelatihan dan data pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalisasi fitur menggunakan StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [5]:
model = LogisticRegression(random_state=42, max_iter=1000)

# Melatih model
model.fit(X_train, y_train)

# Memprediksi nilai target pada data pelatihan dan data pengujian
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

In [6]:
# Evaluasi performa model pada data pelatihan
accuracy_train = accuracy_score(y_train, y_train_pred)
conf_matrix_train = confusion_matrix(y_train, y_train_pred)
classification_rep_train = classification_report(y_train, y_train_pred)

# Menampilkan hasil evaluasi pada data pelatihan
print("Evaluasi pada Data Pelatihan:")
print(f'Accuracy: {accuracy_train}')
print(f'Confusion Matrix:\n{conf_matrix_train}')
print(f'Classification Report:\n{classification_rep_train}')

Evaluasi pada Data Pelatihan:
Accuracy: 0.829683698296837
Confusion Matrix:
[[ 20   0   0   0   0   0]
 [  0  10   2   0   0   0]
 [  0   1  16   4   2   1]
 [  0   0   2  21  10   7]
 [  0   0   0   2  55  30]
 [  0   0   0   0   9 219]]
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        20
           2       0.91      0.83      0.87        12
           3       0.80      0.67      0.73        24
           4       0.78      0.53      0.63        40
           5       0.72      0.63      0.67        87
           6       0.85      0.96      0.90       228

    accuracy                           0.83       411
   macro avg       0.84      0.77      0.80       411
weighted avg       0.82      0.83      0.82       411



In [7]:
# Evaluasi performa model pada data pengujian
accuracy_test = accuracy_score(y_test, y_test_pred)
conf_matrix_test = confusion_matrix(y_test, y_test_pred)
classification_rep_test = classification_report(y_test, y_test_pred)

# Menampilkan hasil evaluasi pada data pengujian
print("\nEvaluasi pada Data Pengujian:")
print(f'Accuracy: {accuracy_test}')
print(f'Confusion Matrix:\n{conf_matrix_test}')
print(f'Classification Report:\n{classification_rep_test}')


Evaluasi pada Data Pengujian:
Accuracy: 0.7766990291262136
Confusion Matrix:
[[ 5  0  0  0  1  0]
 [ 1  2  1  0  0  0]
 [ 0  0  4  2  1  1]
 [ 0  0  1  4  2  1]
 [ 0  0  0  2 12  7]
 [ 0  0  0  0  3 53]]
Classification Report:
              precision    recall  f1-score   support

           1       0.83      0.83      0.83         6
           2       1.00      0.50      0.67         4
           3       0.67      0.50      0.57         8
           4       0.50      0.50      0.50         8
           5       0.63      0.57      0.60        21
           6       0.85      0.95      0.90        56

    accuracy                           0.78       103
   macro avg       0.75      0.64      0.68       103
weighted avg       0.77      0.78      0.77       103



In [8]:
def predict_user_input():
    user_input = []
    for feature in X.columns:
        value = float(input(f"Masukkan nilai untuk {feature}: "))
        user_input.append(value)
    
    # Normalisasi input pengguna menggunakan scaler yang sudah di-fit
    user_input = scaler.transform([user_input])

    # Melakukan prediksi
    prediction = model.predict(user_input)
    return prediction[0]

# Meminta input pengguna dan memberikan prediksi
user_prediction = predict_user_input()

# Menampilkan hasil prediksi
print(f"Hasil prediksi: {user_prediction}")

Hasil prediksi: 5
