# TUGAS PRAKTIKUM


#### 1. Buatlah model SVM dengan menggunakan data voice.csv dengan ketentuan,
    a. Split data dengan rasio 70:30 dan 80:20 untuk setiap model yang akan dibangun.
        - Gunakan model dengan kernel linier.
        - Gunakan model dengan kernel polynomial.
        - Gunakan model dengan kernel RBF.

    b. Tabulasikan performansi setiap split dan kernel berdasarkan metrik akurasi.

In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# =============================
# 1. LOAD DATA
# =============================
df = pd.read_csv("dataset/voice.csv")  

# Encode label (male/female → 0/1)
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

# Feature & target
X = df.drop('label', axis=1)
y = df['label']

# =============================
# 2. Fungsi untuk training model
# =============================
def train_svm(kernel, test_size):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=42, stratify=y
    )
    model = SVC(kernel=kernel)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)
    return acc

# =============================
# 3. Jalankan semua kombinasi
# =============================
results = []

kernels = ["linear", "poly", "rbf"]
splits = [0.30, 0.20]  # 70:30 dan 80:20

for kernel in kernels:
    for split in splits:
        acc = train_svm(kernel, split)
        results.append([kernel, f"{int((1-split)*100)}:{int(split*100)}", acc])

# =============================
# 4. Tabel performa
# =============================
results_df = pd.DataFrame(results, columns=["Kernel", "Split", "Akurasi"])
print(results_df)


   Kernel  Split   Akurasi
0  linear  70:30  0.920084
1  linear  80:20  0.917981
2    poly  70:30  0.511041
3    poly  80:20  0.517350
4     rbf  70:30  0.695058
5     rbf  80:20  0.694006


Berdasarkan hasil akurasi, maka SVM linear memiliki nilai yang paling tinggi sehingga paling cocok digunakan pada dataset voice.

#### 2. Gunakan data pada praktikum 5 untuk membuat model klasifikasi siang dan malam menggunakan SVM dengan kernel RBF menggunakan fitur histrogram. Gunakan rasio 80:20. Anda dapat bereksperimen dengan hyperparameter tunning dari kernel RBF. Catat performansi akurasinya!

In [14]:
# import library
from pathlib import Path
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import cv2
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


In [15]:
def load_dataset(img_dir):
    p = Path(img_dir)
    dirs = p.glob('*')

    img_list = []

    for dir in dirs:
        label = dir.name   
        for file in dir.glob('*.jpg'):
            img = mpimg.imread(file)

            if img is not None:
                img_list.append((img, label))
    
    return img_list


train_img = load_dataset("dataset/images/images/training/")


In [16]:
# preprocessing data

def standarized_input(image):
    return cv2.resize(image, (1100, 600))

def label_encoder(label):
    return 1 if label == "day" else 0

def preprocess(img_list):
    result = []
    for img, label in img_list:
        std = standarized_input(img)
        lbl = label_encoder(label)
        result.append((std, lbl))
    return result

train_std_img_list = preprocess(train_img)


In [None]:
# histogram
def extract_hist_feature(img):
    hist_r = cv2.calcHist([img], [0], None, [256], [0, 256]).flatten()
    hist_g = cv2.calcHist([img], [1], None, [256], [0, 256]).flatten()
    hist_b = cv2.calcHist([img], [2], None, [256], [0, 256]).flatten()

    hist = np.concatenate([hist_r, hist_g, hist_b])
    hist = hist / (np.sum(hist) + 1e-6)

    return hist


In [18]:
# Mengubah List Menjadi Tabel Fitur + Label
def extract_hist_table(img_list):
    feats = []
    labels = []

    for img, label in img_list:
        h = extract_hist_feature(img)
        feats.append(h)
        labels.append(label)

    df = pd.DataFrame(feats)
    df['LABEL'] = labels
    return df

df = extract_hist_table(train_std_img_list)


In [19]:
# split data

X = df.drop("LABEL", axis=1).values
y = df["LABEL"].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [20]:
# SVM dengan Kernel RBF + Hyperparameter Tuning

param_grid = {
    "C": [0.1, 1, 10, 100],
    "gamma": ["scale", 0.1, 0.01, 0.001],
    "kernel": ["rbf"]
}

grid = GridSearchCV(
    SVC(),
    param_grid,
    cv=5,
    scoring="accuracy",
    n_jobs=-1
)

grid.fit(X_train, y_train)


0,1,2
,estimator,SVC()
,param_grid,"{'C': [0.1, 1, ...], 'gamma': ['scale', 0.1, ...], 'kernel': ['rbf']}"
,scoring,'accuracy'
,n_jobs,-1
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,C,1
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [21]:
# evaluasi model

best_model = grid.best_estimator_

y_pred = best_model.predict(X_test)

acc = accuracy_score(y_test, y_pred)

print("Best Params :", grid.best_params_)
print("Accuracy (Test):", acc)


Best Params : {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy (Test): 1.0


In [22]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))


[[24  0]
 [ 0 24]]
