In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import joblib
import os

# Memuat data
data_file_path = "../data_latih.xlsx"
data = pd.read_excel(data_file_path)
print("Data Loaded:")
print(data.head(), "\n")




Data Loaded:
           Nama L/P                    Penghasilan Status Ekonomi  \
0  Aam Garot Hs   P  Rp. 1,000,000 - Rp. 1,999,999         MISKIN   
1   Aan Priatna   P      Rp. 500,000 - Rp. 999,999          CUKUP   
2     Abd Kahar   P      Rp. 500,000 - Rp. 999,999         MISKIN   
3     Abd Kahar   P  Rp. 1,000,000 - Rp. 1,999,999          CUKUP   
4     Abd Manap   P      Rp. 500,000 - Rp. 999,999  SANGAT MISKIN   

   Jumlah Tanggungan Layak PIP            Alasan Layak PIP  Tahun Penerimaan  \
0                  1     Tidak                         NaN              2023   
1                  2     Tidak                         NaN              2023   
2                  5        Ya  Siswa Miskin/Rentan Miskin              2023   
3                  4     Tidak                         NaN              2023   
4                  3        Ya  Siswa Miskin/Rentan Miskin              2023   

   Jumlah Bantuan Status Bantuan Status Kesesuaian  
0               0       Diterima      

In [3]:
# Mengisi nilai yang hilang
data['Alasan Layak PIP'].fillna('Tidak Layak', inplace=True)
print("Nilai yang hilang diisi:")
print(data.head(), "\n")



Nilai yang hilang diisi:
           Nama L/P                    Penghasilan Status Ekonomi  \
0  Aam Garot Hs   P  Rp. 1,000,000 - Rp. 1,999,999         MISKIN   
1   Aan Priatna   P      Rp. 500,000 - Rp. 999,999          CUKUP   
2     Abd Kahar   P      Rp. 500,000 - Rp. 999,999         MISKIN   
3     Abd Kahar   P  Rp. 1,000,000 - Rp. 1,999,999          CUKUP   
4     Abd Manap   P      Rp. 500,000 - Rp. 999,999  SANGAT MISKIN   

   Jumlah Tanggungan Layak PIP            Alasan Layak PIP  Tahun Penerimaan  \
0                  1     Tidak                 Tidak Layak              2023   
1                  2     Tidak                 Tidak Layak              2023   
2                  5        Ya  Siswa Miskin/Rentan Miskin              2023   
3                  4     Tidak                 Tidak Layak              2023   
4                  3        Ya  Siswa Miskin/Rentan Miskin              2023   

   Jumlah Bantuan Status Bantuan Status Kesesuaian  
0               0       Di

In [4]:
# Encoding label
label_cols = ['L/P', 'Penghasilan', 'Status Ekonomi', 'Layak PIP', 'Alasan Layak PIP', 'Status Bantuan', 'Status Kesesuaian']
label_encoders = {}
for col in label_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

print("Data setelah encoding:")
print(data.head(), "\n")



Data setelah encoding:
           Nama  L/P  Penghasilan  Status Ekonomi  Jumlah Tanggungan  \
0  Aam Garot Hs    0            1               1                  1   
1   Aan Priatna    0            3               0                  2   
2     Abd Kahar    0            3               1                  5   
3     Abd Kahar    0            1               0                  4   
4     Abd Manap    0            3               2                  3   

   Layak PIP  Alasan Layak PIP  Tahun Penerimaan  Jumlah Bantuan  \
0          0                 1              2023               0   
1          0                 1              2023         1500000   
2          1                 0              2023         1500000   
3          0                 1              2023         1500000   
4          1                 0              2023               0   

   Status Bantuan  Status Kesesuaian  
0               1                  1  
1               1                  1  
2               1 

In [5]:
# Memisahkan fitur dan label
X = data.drop(columns=['Status Kesesuaian', 'Nama'])
y = data['Status Kesesuaian']

print("Fitur (X):")
print(X.head(), "\n")
print("Label (y):")
print(y.head(), "\n")



Fitur (X):
   L/P  Penghasilan  Status Ekonomi  Jumlah Tanggungan  Layak PIP  \
0    0            1               1                  1          0   
1    0            3               0                  2          0   
2    0            3               1                  5          1   
3    0            1               0                  4          0   
4    0            3               2                  3          1   

   Alasan Layak PIP  Tahun Penerimaan  Jumlah Bantuan  Status Bantuan  
0                 1              2023               0               1  
1                 1              2023         1500000               1  
2                 0              2023         1500000               1  
3                 1              2023         1500000               1  
4                 0              2023               0               0   

Label (y):
0    1
1    1
2    0
3    1
4    0
Name: Status Kesesuaian, dtype: int32 



In [6]:
# Pembagian data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data pelatihan dan pengujian:")
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape, "\n")



Data pelatihan dan pengujian:
X_train shape: (508, 9)
X_test shape: (127, 9)
y_train shape: (508,)
y_test shape: (127,) 



In [7]:
# Standarisasi fitur
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("Fitur setelah standarisasi:")
print("X_train:")
print(X_train[:5])
print("X_test:")
print(X_test[:5], "\n")

Fitur setelah standarisasi:
X_train:
[[ 0.         -1.7893947  -1.22513698 -0.30194393 -0.93516776  0.93516776
   0.          1.          1.00394479]
 [ 0.          1.87967698  1.26434136 -0.90583179 -0.93516776  0.93516776
   0.         -1.          1.00394479]
 [ 0.          0.04514114 -1.22513698  1.50971965 -0.93516776  0.93516776
   0.          1.         -0.99607071]
 [ 0.         -0.87212678 -1.22513698 -0.30194393 -0.93516776  0.93516776
   0.          1.          1.00394479]
 [ 0.          1.87967698 -1.22513698 -0.90583179 -0.93516776  0.93516776
   0.         -1.         -0.99607071]]
X_test:
[[ 0.         -0.87212678  1.26434136  1.50971965  1.06932685 -1.06932685
   0.          1.         -0.99607071]
 [ 0.         -0.87212678 -1.22513698 -0.90583179 -0.93516776  0.93516776
   0.          1.          1.00394479]
 [ 0.          0.96240906  0.01960219  0.90583179  1.06932685 -1.06932685
   0.          1.         -0.99607071]
 [ 0.          0.96240906  1.26434136  0.90583179 

In [8]:
# Melatih model SVM
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

print("Model SVM dilatih.\n")

# Menyimpan model dan scaler
def save_model_and_scaler(model, scaler, model_filename, scaler_filename):
    model_dir = os.path.dirname(model_filename)
    scaler_dir = os.path.dirname(scaler_filename)
    
    if model_dir and not os.path.exists(model_dir):
        os.makedirs(model_dir)
    
    if scaler_dir and not os.path.exists(scaler_dir):
        os.makedirs(scaler_dir)
    
    joblib.dump(model, model_filename)
    joblib.dump(scaler, scaler_filename)
    print(f"Model saved to {model_filename}")
    print(f"Scaler saved to {scaler_filename}\n")

model_filename = 'svm_model.joblib'
scaler_filename = 'scaler.joblib'
save_model_and_scaler(svm_model, scaler, model_filename, scaler_filename)


Model SVM dilatih.

Model saved to svm_model.joblib
Scaler saved to scaler.joblib



In [9]:
# Prediksi pada data uji
y_pred = svm_model.predict(X_test)

# Evaluasi model
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred), "\n")


Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.43      0.55        21
           1       1.00      1.00      1.00        62
           2       0.77      0.93      0.85        44

    accuracy                           0.88       127
   macro avg       0.84      0.79      0.80       127
weighted avg       0.88      0.88      0.87       127

Accuracy Score: 0.8818897637795275 



In [14]:
import plotly.express as px
import plotly.graph_objects as go
from sklearn.decomposition import PCA

# Fungsi untuk mendapatkan nilai decision function
decision_function = svm_model.decision_function(X_train)

# Memilih nilai decision function dari kelas yang paling positif
decision_function = np.max(decision_function, axis=1)

# Menambahkan kolom 'Decision Value' dan 'Nama' ke data training
train_data = pd.DataFrame(X_train, columns=X.columns)
print("Dimensi X_train:", X_train.shape)
print("Dimensi decision_function:", decision_function.shape)

train_data['Decision Value'] = decision_function
train_data['Status Kesesuaian'] = y_train.values
train_data['Nama'] = data.iloc[y_train.index]['Nama'].values  # Menggunakan indeks y_train

# PCA untuk mengurangi dimensi data menjadi 2D
pca = PCA(n_components=2)
principal_components = pca.fit_transform(X_train)

train_data['PCA1'] = principal_components[:, 0]
train_data['PCA2'] = principal_components[:, 1]

print("Data untuk visualisasi:")
print(train_data[['PCA1', 'PCA2', 'Decision Value', 'Status Kesesuaian', 'Nama']].head(), "\n")

# Plot dengan Plotly
fig = px.scatter(
    train_data, x='PCA1', y='PCA2', color='Status Kesesuaian', hover_data=['Nama', 'Decision Value'],
    title='Visualisasi Hasil Pelatihan SVM'
)

# Menambahkan hyperplane
x_vals = np.linspace(train_data['PCA1'].min(), train_data['PCA1'].max(), 100)
y_vals = -(svm_model.coef_[0][0] * x_vals + svm_model.intercept_[0]) / svm_model.coef_[0][1]

fig.add_trace(go.Scatter(x=x_vals, y=y_vals, mode='lines', name='Hyperplane', line=dict(color='black', dash='dash')))

# Menampilkan plot
fig.show()


Dimensi X_train: (508, 9)
Dimensi decision_function: (508,)
Data untuk visualisasi:
       PCA1      PCA2  Decision Value  Status Kesesuaian  \
0 -1.741353  0.829650        2.222279                  1   
1 -0.855161 -0.544401        2.222255                  1   
2 -1.078206  1.125374        2.222200                  1   
3 -1.727427  1.094940        2.222286                  1   
4 -1.908374 -0.542496        2.222339                  1   

                        Nama  
0              Muhammad Abdi  
1                      Irwan  
2  Darman Syahputra Hasibuan  
3             Dedi Kurniawan  
4             Muhammad Nadan   

