# Hog를 이용한 특징추출을 이용한 SVM

In [1]:
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

from statistics import mean
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.svm import SVC
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
import time
from sklearn.decomposition import PCA

In [2]:
training_data = pd.read_csv('./fashionmnist/fashion-mnist_train.csv')

train_y = training_data['label']
train_X = training_data.drop('label',axis=1)

test_data = pd.read_csv('./fashionmnist/fashion-mnist_test.csv')

test_y = test_data['label']
test_X = test_data.drop('label',axis=1)

In [3]:
private_df = pd.read_csv('Private_data.csv', sep=',')

In [4]:
def extract_hog_features_scala(data):
    hog_features = []
    for img in data:
        # HOG 특징 추출
        features, hog_image = hog(img.reshape(28, 28), visualize=True)
        hog_features.append(features)
    return np.array(hog_features)

In [5]:
def extract_hog_features(data):
    hog_features = []
    for img in data.values:
        # HOG 특징 추출
        features, hog_image = hog(img.reshape(28, 28), visualize=True)
        hog_features.append(features)
    return np.array(hog_features)

In [6]:
scaler = StandardScaler()
#스칼라 정규화만 함. (이미지 전처리만)
X_train_scaled = scaler.fit_transform(train_X)
X_test_scaled = scaler.transform(test_X)

#정규화한 이후 hog특징추출
hog_train_scaled_X = extract_hog_features_scala(X_train_scaled)
hog_test_scaled_X = extract_hog_features_scala(X_test_scaled)

#그냥 hog 특징 추출
hog_train_X = extract_hog_features(train_X)
hog_test_X = extract_hog_features(test_X)

In [26]:
private_df

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,97,93,58,0,0,0,0
1,0,0,0,0,0,0,0,1,1,0,...,141,94,0,0,105,7,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,125,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14995,0,0,0,0,0,0,0,0,0,0,...,84,92,43,0,0,0,0,0,0,0
14996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14997,0,0,0,0,0,0,0,0,0,0,...,172,148,139,92,10,2,2,0,0,0
14998,0,0,0,0,0,0,0,0,0,12,...,129,133,125,0,0,0,0,0,0,0


In [27]:
private_df_hog = extract_hog_features(private_df)

In [25]:
# SVM 모델 초기화 및 훈련 hog
svm_model = SVC(kernel='rbf', C=10)
svm_model.fit(hog_train_X, train_y)

# 예측 및 평가
y_pred = svm_model.predict(hog_test_X)
accuracy = accuracy_score(test_y, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8802


In [28]:
pred = svm_model.predict(private_df_hog)
index_list = [f'{num:05}' for num in range(15000)]
pred_list = list(pred)

In [13]:
# SVM 모델 초기화 및 훈련 scaler 정규화
svm_model = SVC(kernel='rbf', C=10)
svm_model.fit(X_train_scaled, train_y)

# 예측 및 평가
y_pred = svm_model.predict(X_test_scaled)
accuracy = accuracy_score(test_y, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9075


In [19]:
private_df.columns = [f"pixel{i}" for i in range(1,len(private_df.columns)+1)]

In [23]:
private_scaler = scaler.transform(private_df)
pred = svm_model.predict(private_scaler)
index_list = [f'{num:05}' for num in range(15000)]
pred_list = list(pred)

In [29]:
list1 = index_list
list2 = pred_list

# 두 개의 리스트 합성
combined_lists = list(zip(list1, list2))

# NumPy 배열로 변환하여 2차원 배열 생성
array_2d = np.array(combined_lists)
np.savetxt("testResult.txt", combined_lists, fmt='%s', delimiter=" ")

In [9]:
# 정규화 이후 hog
svm_model = SVC(gamma='scale',kernel='rbf', C=8)
svm_model.fit(hog_train_scaled_X, train_y)

# 예측 및 평가
y_pred = svm_model.predict(hog_test_scaled_X)
accuracy = accuracy_score(test_y, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8627


In [10]:
#그냥 SVM
svm_model = SVC(gamma='scale',kernel='rbf', C=10)
svm_model.fit(train_X, train_y)

# 예측 및 평가
y_pred = svm_model.predict(test_X)
accuracy = accuracy_score(test_y, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9083


In [11]:
#PCA
pca = PCA(400)
pca.fit(train_X)
pca_train_X = pca.transform(train_X)
pca_test_X = pca.transform(test_X)

svm_model = SVC(gamma='scale',kernel='rbf', C=10)
svm_model.fit(pca_train_X, train_y)

y_pred = svm_model.predict(pca_test_X)
accuracy = accuracy_score(test_y, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9102


In [12]:
#PCA
pca = PCA(256)
pca.fit(train_X)
pca_train_X = pca.transform(train_X)
pca_test_X = pca.transform(test_X)

svm_model = SVC(gamma='scale',kernel='rbf', C=10)
svm_model.fit(pca_train_X, train_y)

y_pred = svm_model.predict(pca_test_X)
accuracy = accuracy_score(test_y, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9083
