In [51]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np

# Load data from Excel file
data_path = '../data_latih.xlsx'
data_latih = pd.read_excel(data_path)

# Encoding data kategorikal
le = LabelEncoder()

# Encode 'L/P'
data_latih['L/P'] = le.fit_transform(data_latih['L/P'])

# Encode 'Penghasilan'
penghasilan_mapping = {
    'Rp. 500,000 - Rp. 999,999': 0,
    'Rp. 1,000,000 - Rp. 1,999,999': 1,
    'Rp. 2,000,000 - Rp. 4,999,999': 2,
    'Tidak Berpenghasilan': 3,
    'Kurang dari Rp. 500,000': 4
}
data_latih['Penghasilan'] = data_latih['Penghasilan'].map(penghasilan_mapping)

# Encode 'Status Ekonomi'
status_ekonomi_mapping = {
    'SANGAT MISKIN': 0,
    'MISKIN': 1,
    'CUKUP': 2
}
data_latih['Status Ekonomi'] = data_latih['Status Ekonomi'].map(status_ekonomi_mapping)

# Encode 'Layak PIP'
layak_pip_mapping = {'Ya': 1, 'Tidak': 0}
data_latih['Layak PIP'] = data_latih['Layak PIP'].map(layak_pip_mapping)

# Drop columns that are not used in the model
data_latih = data_latih.drop(columns=['Nama', 'Alasan Layak PIP', 'Status Bantuan', 'Status Kesesuaian'])

# Pisahkan fitur dan target
X = data_latih.drop(columns=['Layak PIP'])
y = data_latih['Layak PIP']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Menampilkan shapes dari training dan testing set
print("Training set shape:")
print(X_train.shape)
print(y_train.shape)
print("Testing set shape:")
print(X_test.shape)
print(y_test.shape)



Training set shape:
(508, 6)
(508,)
Testing set shape:
(127, 6)
(127,)


In [52]:
from sklearn.preprocessing import StandardScaler

# Normalisasi kolom numerik
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert back to DataFrame for easier handling
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

# Display the first few rows of the scaled data
print("First few rows of X_train_scaled:")
display(X_train_scaled.head())
print("First few rows of X_test_scaled:")
display(X_test_scaled.head())



First few rows of X_train_scaled:


Unnamed: 0,L/P,Penghasilan,Status Ekonomi,Jumlah Tanggungan,Tahun Penerimaan,Jumlah Bantuan
0,0.0,2.691953,1.225137,-0.301944,0.0,1.0
1,0.0,1.785716,-1.264341,-0.905832,0.0,-1.0
2,0.0,0.879478,1.225137,1.50972,0.0,1.0
3,0.0,-0.026759,1.225137,-0.301944,0.0,1.0
4,0.0,1.785716,1.225137,-0.905832,0.0,-1.0


First few rows of X_test_scaled:


Unnamed: 0,L/P,Penghasilan,Status Ekonomi,Jumlah Tanggungan,Tahun Penerimaan,Jumlah Bantuan
0,0.0,-0.026759,-1.264341,1.50972,0.0,1.0
1,0.0,-0.026759,1.225137,-0.905832,0.0,1.0
2,0.0,-0.932996,-0.019602,0.905832,0.0,1.0
3,0.0,-0.932996,-1.264341,0.905832,0.0,-1.0
4,0.0,-0.932996,-1.264341,-0.301944,0.0,-1.0


In [33]:
from sklearn.model_selection import train_test_split

# Pisahkan fitur dan target
X = data_latih.drop(columns=['Nama', 'Layak PIP', 'Alasan Layak PIP', 'Status Bantuan', 'Status Kesesuaian'])
y = data_latih['Layak PIP'].map({'Ya': 1, 'Tidak': 0})

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Menampilkan shapes dari training dan testing set
print("Training set shape:")
print(X_train.shape)
print(y_train.shape)
print("Testing set shape:")
print(X_test.shape)
print(y_test.shape)


Training set shape:
(241, 6)
(241,)
Testing set shape:
(61, 6)
(61,)


In [53]:
# Inisialisasi parameter

# Bobot awal diinisialisasi dengan nilai nol untuk semua fitur
w = np.zeros(X_train_scaled.shape[1])

# Bias awal diinisialisasi dengan nilai nol
b = 0

# Learning rate (alpha) menentukan seberapa besar langkah yang diambil pada setiap iterasi saat memperbarui bobot dan bias
learning_rate = 0.01

# Jumlah iterasi menentukan berapa kali proses pembaruan bobot dan bias dilakukan
n_iterations = 1000

# Menampilkan parameter awal
print("Initial weights (w):", w)
print("Initial bias (b):", b)
print("Learning rate:", learning_rate)
print("Number of iterations:", n_iterations)



Initial weights (w): [0. 0. 0. 0. 0. 0.]
Initial bias (b): 0
Learning rate: 0.01
Number of iterations: 1000


In [54]:
# Fungsi untuk menghitung hinge loss
def hinge_loss(X, y, w, b):
    # Hinge loss dihitung dengan maks(0, 1 - y * (X * w + b))
    return np.maximum(0, 1 - y * (np.dot(X, w) + b))

# Menghitung hinge loss untuk parameter awal
X_train_array = X_train_scaled.to_numpy()
y_train_array = y_train.to_numpy()

initial_loss = hinge_loss(X_train_array, y_train_array, w, b)
print("Initial hinge loss:", initial_loss)


Initial hinge loss: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 

In [62]:
def gradient_descent(X, y, w, b, learning_rate, n_iterations):
    # Melakukan iterasi sebanyak n_iterations
    for iteration in range(n_iterations):
        # Iterasi melalui setiap sampel data
        for idx, x_i in enumerate(X):
            # Menghitung kondisi apakah sampel terklasifikasi dengan benar (margin >= 1)
            if y[idx] * (np.dot(x_i, w) + b) >= 1:
                # Jika terklasifikasi dengan benar, hanya terapkan regularisasi pada bobot
                w -= learning_rate * (2 * 1/n_iterations * w)
            else:
                # Jika terklasifikasi salah, perbarui bobot dan bias untuk mengurangi kesalahan
                w -= learning_rate * (2 * 1/n_iterations * w - np.dot(x_i, y[idx] * x_i))
                b -= learning_rate * y[idx]
        
        # Cetak bobot dan bias pada setiap iterasi
        print(f"Iteration {iteration + 1}/{n_iterations}:")
        print(f"Weights: {w}")
        print(f"Bias: {b}\n")
    
    # Mengembalikan bobot dan bias yang telah diperbarui
    return w, b

# Inisialisasi parameter
w = np.zeros(X_train_scaled.shape[1])  # Bobot awal diinisialisasi dengan nilai nol untuk semua fitur
b = 0  # Bias awal diinisialisasi dengan nilai nol
learning_rate = 0.01  # Learning rate menentukan seberapa besar langkah yang diambil pada setiap iterasi
n_iterations = 100  # Jumlah iterasi menentukan berapa kali proses pembaruan bobot dan bias dilakukan (gunakan 10 untuk mencetak lebih sedikit iterasi)

# Melakukan gradient descent untuk memperbarui bobot dan bias
w, b = gradient_descent(X_train_scaled.to_numpy(), y_train.to_numpy(), w, b, learning_rate, n_iterations)

# Menampilkan parameter setelah pelatihan
print("Trained weights (w):", w)
print("Trained bias (b):", b)




Iteration 1/100:
Weights: [5.1991989 5.1991989 5.1991989 5.1991989 5.1991989 5.1991989]
Bias: -1.7000000000000013

Iteration 2/100:
Weights: [9.36888001 9.36888001 9.36888001 9.36888001 9.36888001 9.36888001]
Bias: -3.1999999999999758

Iteration 3/100:
Weights: [13.04338916 13.04338916 13.04338916 13.04338916 13.04338916 13.04338916]
Bias: -4.669999999999945

Iteration 4/100:
Weights: [16.36287333 16.36287333 16.36287333 16.36287333 16.36287333 16.36287333]
Bias: -6.1399999999999135

Iteration 5/100:
Weights: [19.36163443 19.36163443 19.36163443 19.36163443 19.36163443 19.36163443]
Bias: -7.609999999999882

Iteration 6/100:
Weights: [22.0706602 22.0706602 22.0706602 22.0706602 22.0706602 22.0706602]
Bias: -9.07999999999985

Iteration 7/100:
Weights: [24.58291327 24.58291327 24.58291327 24.58291327 24.58291327 24.58291327]
Bias: -10.569999999999819

Iteration 8/100:
Weights: [26.94474088 26.94474088 26.94474088 26.94474088 26.94474088 26.94474088]
Bias: -12.089999999999787

Iteration 9/

In [63]:
# Fungsi untuk melakukan prediksi menggunakan model SVM yang telah dilatih
def predict(X, w, b):
    return np.sign(np.dot(X, w) + b)
# Melakukan prediksi pada data uji
y_pred = predict(X_test_scaled.to_numpy(), w, b)

# Menghitung akurasi model
accuracy = np.mean(y_pred == y_test.to_numpy())

print(f'Accuracy: {accuracy}')



Accuracy: 0.047244094488188976


In [64]:
# Melakukan prediksi pada data latih
y_train_pred = predict(X_train_scaled.to_numpy(), w, b)

# Menghitung akurasi model pada data latih
train_accuracy = np.mean(y_train_pred == y_train.to_numpy())

print(f'Train Accuracy: {train_accuracy}')


Train Accuracy: 0.021653543307086614


In [65]:
import joblib

# Simpan bobot (weights) dan bias model ke file
model_path = 'svm_model.pkl'
joblib.dump((w, b), model_path)

print(f"Model saved to {model_path}")


Model saved to svm_model.pkl


In [66]:
import joblib

# Muat bobot (weights) dan bias model dari file
w_loaded, b_loaded = joblib.load(model_path)

print("Model loaded:")
print(f"Weights: {w_loaded}")
print(f"Bias: {b_loaded}")


Model loaded:
Weights: [72.70196854 72.70196854 72.70196854 72.70196854 72.70196854 72.70196854]
Bias: -190.6399999999716
