# JS11 - TUGAS PRAKTIKUM

Model SVM untuk data voice.csv

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load and preprocessing data
file_id = "1RgIXDq8PnRGYw0otnpbvc3L98_afRkjo"
url = f"https://drive.google.com/uc?id={file_id}"
data = pd.read_csv(url)
data.head()

# Encode label: male/female → 0/1
le = LabelEncoder()
data['label'] = le.fit_transform(data['label'])

# Pisahkan fitur dan label
X = data.drop('label', axis=1)
y = data['label']

# Normalisasi
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Fungsi untuk melatih dan mengukur performa
def evaluate_svm(test_size):
    results = []
    split_name = f"{int((1-test_size)*100)}:{int(test_size*100)}"

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=test_size, random_state=42
    )

    kernels = ['linear', 'poly', 'rbf']

    for k in kernels:
        model = SVC(kernel=k)
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
        acc = accuracy_score(y_test, pred)

        results.append([split_name, k, acc])

    return results

# Evaluasi untuk semua split
all_results = []
all_results += evaluate_svm(0.30)   # split 70:30
all_results += evaluate_svm(0.20)   # split 80:20

# Buat tabel hasil
df_results = pd.DataFrame(all_results, columns=['Split', 'Kernel', 'Accuracy'])
print(df_results)

   Split  Kernel  Accuracy
0  70:30  linear  0.970557
1  70:30    poly  0.956887
2  70:30     rbf  0.981073
3  80:20  linear  0.976341
4  80:20    poly  0.968454
5  80:20     rbf  0.982650


Model SVM untuk data images day and night

In [5]:
from pathlib import Path
import matplotlib.image as mpimg
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

train_dir = "/content/drive/MyDrive/dataset ml/images/training/"
test_dir = "/content/drive/MyDrive/dataset ml/images/test/"

def load_dataset(img_dir):
    p = Path(img_dir)
    dirs = p.glob('*')

    img_list = []
    for d in dirs:
        label = str(d).split('/')[-1]
        for file in d.glob("*.jpg"):
            img = mpimg.imread(file)
            if img is not None:
                img_list.append((img, label))
    return img_list

train_img = load_dataset(train_dir)
test_img  = load_dataset(test_dir)

# Preprocessing (resize + label encoding)
def standarized_input(image):
    return cv2.resize(image, (1100,600))

def label_encoder(label):
    return 1 if label == "day" else 0

def preprocess(img_list):
    std_list = []
    for img, label in img_list:
        std_img = standarized_input(img)
        enc_label = label_encoder(label)
        std_list.append((std_img, enc_label))
    return std_list

train_std = preprocess(train_img)
test_std  = preprocess(test_img)

# Feature Extraction - Histogram
def extract_histogram(image):
    # convert RGB → grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # histogram 256 bins
    hist = cv2.calcHist([gray], [0], None, [256], [0,256])

    # normalisasi
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def extract_features(img_list):
    feat_list = []
    label_list = []

    for img, label in img_list:
        hist = extract_histogram(img)
        feat_list.append(hist)
        label_list.append(label)

    return np.array(feat_list), np.array(label_list)

X_train, y_train = extract_features(train_std)
X_test, y_test   = extract_features(test_std)

# Split training 80:20
X_train_split, X_val, y_train_split, y_val = train_test_split(
    X_train, y_train, test_size=0.20, stratify=y_train, random_state=42
)

# Model SVM kernal RBF dan Hyperparameter Tuning
param_grid = {
    "C": [0.1, 1, 10, 100],
    "gamma": ["scale", "auto", 0.01, 0.001, 0.0001]
}

svm_model = SVC(kernel="rbf")

grid = GridSearchCV(
    svm_model, param_grid, cv=3, scoring="accuracy", n_jobs=-1, verbose=1
)

grid.fit(X_train_split, y_train_split)

print("Best parameters:", grid.best_params_)

# Evaluasi mjodel
best_model = grid.best_estimator_

y_pred_train = best_model.predict(X_train_split)
y_pred_val   = best_model.predict(X_val)
y_pred_test  = best_model.predict(X_test)

acc_train = accuracy_score(y_train_split, y_pred_train)
acc_val   = accuracy_score(y_val, y_pred_val)
acc_test  = accuracy_score(y_test, y_pred_test)

print("\nAKURASI MODEL SVM RBF")
print("Akurasi Train (80%):", acc_train)
print("Akurasi Validasi  :", acc_val)
print("Akurasi Test      :", acc_test)


Fitting 3 folds for each of 20 candidates, totalling 60 fits
Best parameters: {'C': 10, 'gamma': 'scale'}

AKURASI MODEL SVM RBF
Akurasi Train (80%): 1.0
Akurasi Validasi  : 0.9791666666666666
Akurasi Test      : 0.975
