opencv version: 3.4.2

# Problem 1

In [1]:
import time
import cv2
import glob
import os
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.svm import LinearSVC

start_time = time.time()

'''
Parameters
'''
patch_stride = 16
K = 20

'''
Load Dataset
'''
def scene15():

    train_folders = glob.glob("SCENE-15/train/*")
    train_folders.sort()
    classes = dict()
    x_train = list()
    y_train = list()
    for index, folder in enumerate(train_folders):
        label = os.path.basename(folder)
        classes[label] = index
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_train.append(cv2.imread(path, 0))
            y_train.append(index)

    x_test = list()
    y_test = list()
    test_folders = glob.glob("SCENE-15/test/*")
    test_folders.sort()
    for folder in test_folders:
        label = os.path.basename(folder)
        index = classes[label]
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_test.append(cv2.imread(path, 0))
            y_test.append(index)

    return x_train, y_train, x_test, y_test, sorted(classes.keys())

print("Load Dataset ...")
x_train, y_train, x_test, y_test, labels_names = scene15()

random_indices = list(range(len(y_train)))
random.shuffle(random_indices)
x_train = np.array(x_train)[random_indices].tolist()
y_train = np.array(y_train)[random_indices].tolist()

'''
Extract Patches
'''
train_key_points = list()
train_feature_shapes = list()
for image in x_train:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    train_key_points.append(image_key_points)
    train_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))

test_key_points = list()
test_feature_shapes = list()
for image in x_test:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    test_key_points.append(image_key_points)
    test_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))



Load Dataset ...


  x_train = np.array(x_train)[random_indices].tolist()


1-1.

    [특징 추출] 코드 상에서 주어진 Keypoint를 통하여, 각각의 이미지마다 특징을 추출하세요. 이 때, OpenCV 패키지를 활용하여, SIFT와 같은 특징을 추출하세요. (자세한 사항은 템플릿 코드를 참고)

In [2]:

'''
Extract Features
'''
###################################################################################
# 아래의 코드의 빈 곳(None 부분)을 채우세요.
# None 부분 외의 부분은 가급적 수정 하지 말고, 주어진 형식에 맞추어
# None 부분 만을 채워주세요. 임의적으로 전체적인 구조를 수정하셔도 좋지만,
# 파이썬 코딩에 익숙 하지 않으시면, 가급적 틀을 유지하시는 것을 권장합니다.
# 1) descriptor를 선정하세요. (SIFT, SURF 등) OpenCV의 패키지를 사용하시면 됩니다.
# 2) for 반복문 안에서, 1)에서 정의한 descriptor를 통하여 features를 추출하세요.
#    features의 차원은 (# of keypoints, feature_dim) 입니다.
###################################################################################
# Write Your Code Here ############################################################
descriptor = cv2.ORB_create()
###################################################################################
train_features = list()
index = 0
for image, key_points in zip(x_train, train_key_points):
    # Write Your Code Here ########################################################
    _, features = descriptor.compute(image, key_points)
    ###############################################################################
    train_features.append(features)
    index += 1
    print("Extract Train Features ... {:4d}/{:4d}".format(index, len(x_train)))

test_features = list()
index = 0
for image, key_points in zip(x_test, test_key_points):
    # Write Your Code Here ########################################################
    _, features = descriptor.compute(image, key_points)
    ###############################################################################
    test_features.append(features)
    index += 1
    print("Extract Test Features ... {:4d}/{:4d}".format(index, len(x_test)))

'''
Normalizing
'''
flattened_train_features = np.concatenate(train_features, axis=0)
pca = PCA(n_components=flattened_train_features.shape[-1], whiten=True)
pca.fit(flattened_train_features)
train_normalized_features = list()
index = 0
for features in train_features:
    features = pca.transform(features)
    train_normalized_features.append(features)
    index += 1
    print("Normalize Train Features ... {:4d}/{:4d}".format(index, len(train_features)))
test_normalized_features = list()
index = 0
for features in test_features:
    features = pca.transform(features)
    test_normalized_features.append(features)
    index += 1
    print("Normalize Test Features ... {:4d}/{:4d}".format(index, len(test_features)))


Extract Train Features ...    1/1538
Extract Train Features ...    2/1538
Extract Train Features ...    3/1538
Extract Train Features ...    4/1538
Extract Train Features ...    5/1538
Extract Train Features ...    6/1538
Extract Train Features ...    7/1538
Extract Train Features ...    8/1538
Extract Train Features ...    9/1538
Extract Train Features ...   10/1538
Extract Train Features ...   11/1538
Extract Train Features ...   12/1538
Extract Train Features ...   13/1538
Extract Train Features ...   14/1538
Extract Train Features ...   15/1538
Extract Train Features ...   16/1538
Extract Train Features ...   17/1538
Extract Train Features ...   18/1538
Extract Train Features ...   19/1538
Extract Train Features ...   20/1538
Extract Train Features ...   21/1538
Extract Train Features ...   22/1538
Extract Train Features ...   23/1538
Extract Train Features ...   24/1538
Extract Train Features ...   25/1538
Extract Train Features ...   26/1538
Extract Train Features ...   27/1538
E

Report: 

    Patch stride와 K means clustering을 각각 16과 20으로 설정한 뒤, 데이터셋을 불러왔다. 그 후, 주어진 keypoint에서 ORB descriptor를 사용해 특징을 추출하고, 이에 normalization을 수행해주었다. 

1-2.

    [Bag-of-Features 구현] K-means을 통해 구해진 codebook을 통하여, 각각의 이미지의 특징을 인코딩(histogram화 혹은 양자화 라고도 합니다.) 하세요. (자세한 사항은 템플릿 코드를 참고)

In [16]:
'''
Make Codebook
'''
###################################################################################
# 아래의 코드의 빈 곳(None 부분)을 채우세요.
# None 부분 외의 부분은 가급적 수정 하지 말고, 주어진 형식에 맞추어
# None 부분 만을 채워주세요. 임의적으로 전체적인 구조를 수정하셔도 좋지만,
# 파이썬 코딩에 익숙 하지 않으시면, 가급적 틀을 유지하시는 것을 권장합니다.
# 1) 함수 encode 부분 안의 None 부분을 채우세요.
#    distances는 K means 알고리즘을 통해 얻어진 centroids, 즉 codewords(visual words)와
#    각 이미지의 특징들 간의 거리 입니다.
#    distances 값을 이용하여, features(# of keypoints, feature_dim)를
#    인코딩(histogram 혹은 quantization이라고도 함) 하세요.
#    인코딩된 결과인 representations은 (K)로 표현되어야 합니다.
#    이 때, K는 codewords의 개수입니다.
###################################################################################
class Codebook:

    def __init__(self, K):

        self.K = K

        self.kmeans = KMeans(n_clusters=K, verbose=True)

    def make_code_words(self, features):

        self.kmeans.fit(features)

    def encode(self, features, shapes):

        distances = self.kmeans.transform(features)

        # Write Your Code Here ########################################################
        representations = np.zeros(dtype=np.int64, shape=(self.K,))
        '''
        distances를 통해 representation에 대한 연산 진행
        np.argmin, np.arange, np.sum 등 라이브러리 사용
        '''
        '''
        nearest_centroid_indices = self.kmeans.predict(features)
        
        nearest_centroid_indices 활용한 연산도 가능
        '''
        nearest_centroid_idx = self.kmeans.predict(features)
        for idx in nearest_centroid_idx:
            representations[idx] += 1
            
        ###############################################################################

        if np.array(representations).shape != (self.K, ):
            # representations는 반드시 (K) 차원을 가져야 합니다.
            # 해당 조건문은 잘못 구현했을 경우를 판단하기 위해 작성되었으며, 추후 문제없이 구현되었다면 지우셔도 됩니다.
            print("Your code may be wrong")

        return representations

print("Make Codebook ...")
flattened_normalized_train_features = pca.transform(flattened_train_features)
codebook = Codebook(K)
codebook.make_code_words(flattened_normalized_train_features)

'''
Encode Features
'''
train_encoded_features = list()
index = 0
for features, shapes in zip(train_normalized_features, train_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    train_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Train Features ... {:4d}/{:4d}".format(index, len(train_normalized_features)))
test_encoded_features = list()
index = 0
for features, shapes in zip(test_normalized_features, test_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    test_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Test Features ... {:4d}/{:4d}".format(index, len(test_normalized_features)))

'''
Approximate Kernel
'''
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
chi2sampler.fit(train_encoded_features, y_train)
train_encoded_features = chi2sampler.transform(train_encoded_features)
test_encoded_features = chi2sampler.transform(test_encoded_features)


Make Codebook ...
Initialization complete
Iteration 0, inertia 8941514.017877419.
Iteration 1, inertia 7106955.640813997.
Iteration 2, inertia 6993127.3509082515.
Iteration 3, inertia 6948211.786335387.
Iteration 4, inertia 6920715.110366154.
Iteration 5, inertia 6901207.575623773.
Iteration 6, inertia 6886370.67170633.
Iteration 7, inertia 6874264.35836044.
Iteration 8, inertia 6863685.26240388.
Iteration 9, inertia 6854355.979594094.
Iteration 10, inertia 6845962.190752074.
Iteration 11, inertia 6838279.106193939.
Iteration 12, inertia 6831476.86331477.
Iteration 13, inertia 6825941.81055539.
Iteration 14, inertia 6821428.2118552625.
Iteration 15, inertia 6817660.8665780425.
Iteration 16, inertia 6814528.254258452.
Iteration 17, inertia 6811871.097959658.
Iteration 18, inertia 6809582.451240652.
Iteration 19, inertia 6807649.529882865.
Iteration 20, inertia 6806036.229048199.
Iteration 21, inertia 6804766.41458477.
Iteration 22, inertia 6803742.271486646.
Iteration 23, inertia 680288

Report: 

    normalize된 train feature로 fit한 k-means clustering을 사용해 train feature와 test feature를 encode 해준다. 이때 self.kmeans.predict를 사용해 K개의 representation 중에 어느 representation에 속하는지 구하고 이를 histogram으로 만들어 반환한다.

1-3.

    [SVM을 통한 이미지 분류] Bag-of-Features 알고리즘을 통해 얻어진 인코딩된 벡터를 통해,SVM을 학습하세요. 템플릿 코드 기준으로는, sklearn 패키지의 LinearSVC를 활용하여 SVM 학습을 구현하시면 됩니다.

In [22]:

'''
Classify Images with SVM
'''
###################################################################################
# 아래의 코드의 빈 곳(None 부분)을 채우세요.
# None 부분 외의 부분은 가급적 수정 하지 말고, 주어진 형식에 맞추어
# None 부분 만을 채워주세요. 임의적으로 전체적인 구조를 수정하셔도 좋지만,
# 파이썬 코딩에 익숙 하지 않으시면, 가급적 틀을 유지하시는 것을 권장합니다.
# 1) 아래의 model 부분에 sklearn 패키지를 활용하여, Linear SVM(SVC) 모델을 정의하세요.
#    처음에는 SVM의 parameter를 기본으로 설정하여 구동하시길 권장합니다.
#    구동 성공 시, SVM의 C 값과 max_iter 파라미터 등을 조정하여 성능 향상을 해보시길 바랍니다.
###################################################################################
# Write Your Code Here ############################################################
model = LinearSVC(C=0.01, max_iter=100, verbose=True)
###################################################################################

print("Classify Images ...")
model.fit(train_encoded_features, y_train)
train_score = model.score(train_encoded_features, y_train)
test_score = model.score(test_encoded_features, y_test)

elapsed_time = time.time() - start_time

'''
Print Results
'''
print()
print("=" * 90)
print("Train  Score: {:.5f}".format(train_score))
print("Test   Score: {:.5f}".format(test_score))
print("Elapsed Time: {:.2f} secs".format(elapsed_time))
print("=" * 90)


Classify Images ...
[LibLinear]..*.
optimization finished, #iter = 30
Objective value = -3.299359
nSV = 1095
...*
optimization finished, #iter = 31
Objective value = -2.673902
nSV = 584
..*
optimization finished, #iter = 29
Objective value = -2.912871
nSV = 627
..*.
optimization finished, #iter = 30
Objective value = -2.851211
nSV = 645
...*
optimization finished, #iter = 32
Objective value = -4.444787
nSV = 1389
..*
optimization finished, #iter = 29
Objective value = -3.535181
nSV = 1098
...*
optimization finished, #iter = 32
Objective value = -3.507456
nSV = 1220
..*
optimization finished, #iter = 29
Objective value = -3.525413
nSV = 1305
..*
optimization finished, #iter = 29
Objective value = -3.627420
nSV = 1219
..*.
optimization finished, #iter = 30
Objective value = -3.654487
nSV = 1309
..*.
optimization finished, #iter = 30
Objective value = -3.795873
nSV = 1090
..*
optimization finished, #iter = 29
Objective value = -3.210539
nSV = 841
..*
optimization finished, #iter = 29
Obje

Report: 

    앞서 얻은 bag-of-feature에서 얻어진 특징 벡터를 사용해 sklearn의 SVM을 학습해보았는데, 이때 C=0.01, max_iter=100, verbose=True로 설정하였더니 train 데이터에 대해서 0.371, test 데이터에 대해 0.265의 정확도를 보였다.

# Problem 2

2-1. 

    [특징 추출] 특징 추출 알고리즘과, keypoint를 만들 때의 patch_stride를 바꾸어보며 성능을 개선해보고, 성능 증감의 이유를 분석해보세요.

In [1]:
import time
import cv2
import glob
import os
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.svm import LinearSVC

start_time = time.time()

'''
Parameters
'''
patch_stride = 10
K = 20

'''
Load Dataset
'''
def scene15():

    train_folders = glob.glob("SCENE-15/train/*")
    train_folders.sort()
    classes = dict()
    x_train = list()
    y_train = list()
    for index, folder in enumerate(train_folders):
        label = os.path.basename(folder)
        classes[label] = index
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_train.append(cv2.imread(path, 0))
            y_train.append(index)

    x_test = list()
    y_test = list()
    test_folders = glob.glob("SCENE-15/test/*")
    test_folders.sort()
    for folder in test_folders:
        label = os.path.basename(folder)
        index = classes[label]
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_test.append(cv2.imread(path, 0))
            y_test.append(index)

    return x_train, y_train, x_test, y_test, sorted(classes.keys())

print("Load Dataset ...")
x_train, y_train, x_test, y_test, labels_names = scene15()

random_indices = list(range(len(y_train)))
random.shuffle(random_indices)
x_train = np.array(x_train)[random_indices].tolist()
y_train = np.array(y_train)[random_indices].tolist()

'''
Extract Patches
'''
train_key_points = list()
train_feature_shapes = list()
for image in x_train:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    train_key_points.append(image_key_points)
    train_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))

test_key_points = list()
test_feature_shapes = list()
for image in x_test:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    test_key_points.append(image_key_points)
    test_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))
    
    
'''
Extract Features
'''
###################################################################################
# 아래의 코드의 빈 곳(None 부분)을 채우세요.
# None 부분 외의 부분은 가급적 수정 하지 말고, 주어진 형식에 맞추어
# None 부분 만을 채워주세요. 임의적으로 전체적인 구조를 수정하셔도 좋지만,
# 파이썬 코딩에 익숙 하지 않으시면, 가급적 틀을 유지하시는 것을 권장합니다.
# 1) descriptor를 선정하세요. (SIFT, SURF 등) OpenCV의 패키지를 사용하시면 됩니다.
# 2) for 반복문 안에서, 1)에서 정의한 descriptor를 통하여 features를 추출하세요.
#    features의 차원은 (# of keypoints, feature_dim) 입니다.
###################################################################################
# Write Your Code Here ############################################################
descriptor = cv2.xfeatures2d.SIFT_create()
###################################################################################
train_features = list()
index = 0
for image, key_points in zip(x_train, train_key_points):
    # Write Your Code Here ########################################################
    _, features = descriptor.compute(image, key_points)
    ###############################################################################
    train_features.append(features)
    index += 1
    print("Extract Train Features ... {:4d}/{:4d}".format(index, len(x_train)))

test_features = list()
index = 0
for image, key_points in zip(x_test, test_key_points):
    # Write Your Code Here ########################################################
    _, features = descriptor.compute(image, key_points)
    ###############################################################################
    test_features.append(features)
    index += 1
    print("Extract Test Features ... {:4d}/{:4d}".format(index, len(x_test)))

'''
Normalizing
'''
flattened_train_features = np.concatenate(train_features, axis=0)
pca = PCA(n_components=flattened_train_features.shape[-1], whiten=True)
pca.fit(flattened_train_features)
train_normalized_features = list()
index = 0
for features in train_features:
    features = pca.transform(features)
    train_normalized_features.append(features)
    index += 1
    print("Normalize Train Features ... {:4d}/{:4d}".format(index, len(train_features)))
test_normalized_features = list()
index = 0
for features in test_features:
    features = pca.transform(features)
    test_normalized_features.append(features)
    index += 1
    print("Normalize Test Features ... {:4d}/{:4d}".format(index, len(test_features)))
    
    '''
Make Codebook
'''
###################################################################################
# 아래의 코드의 빈 곳(None 부분)을 채우세요.
# None 부분 외의 부분은 가급적 수정 하지 말고, 주어진 형식에 맞추어
# None 부분 만을 채워주세요. 임의적으로 전체적인 구조를 수정하셔도 좋지만,
# 파이썬 코딩에 익숙 하지 않으시면, 가급적 틀을 유지하시는 것을 권장합니다.
# 1) 함수 encode 부분 안의 None 부분을 채우세요.
#    distances는 K means 알고리즘을 통해 얻어진 centroids, 즉 codewords(visual words)와
#    각 이미지의 특징들 간의 거리 입니다.
#    distances 값을 이용하여, features(# of keypoints, feature_dim)를
#    인코딩(histogram 혹은 quantization이라고도 함) 하세요.
#    인코딩된 결과인 representations은 (K)로 표현되어야 합니다.
#    이 때, K는 codewords의 개수입니다.
###################################################################################
class Codebook:

    def __init__(self, K):

        self.K = K

        self.kmeans = KMeans(n_clusters=K, verbose=True)

    def make_code_words(self, features):

        self.kmeans.fit(features)

    def encode(self, features, shapes):

        distances = self.kmeans.transform(features)

        # Write Your Code Here ########################################################
        representations = np.zeros(dtype=np.int64, shape=(self.K,))
        '''
        distances를 통해 representation에 대한 연산 진행
        np.argmin, np.arange, np.sum 등 라이브러리 사용
        '''
        '''
        nearest_centroid_indices = self.kmeans.predict(features)
        
        nearest_centroid_indices 활용한 연산도 가능
        '''
        nearest_centroid_idx = self.kmeans.predict(features)
        for idx in nearest_centroid_idx:
            representations[idx] += 1
            
        ###############################################################################

        if np.array(representations).shape != (self.K, ):
            # representations는 반드시 (K) 차원을 가져야 합니다.
            # 해당 조건문은 잘못 구현했을 경우를 판단하기 위해 작성되었으며, 추후 문제없이 구현되었다면 지우셔도 됩니다.
            print("Your code may be wrong")

        return representations

print("Make Codebook ...")
flattened_normalized_train_features = pca.transform(flattened_train_features)
codebook = Codebook(K)
codebook.make_code_words(flattened_normalized_train_features)

'''
Encode Features
'''
train_encoded_features = list()
index = 0
for features, shapes in zip(train_normalized_features, train_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    train_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Train Features ... {:4d}/{:4d}".format(index, len(train_normalized_features)))
test_encoded_features = list()
index = 0
for features, shapes in zip(test_normalized_features, test_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    test_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Text Features ... {:4d}/{:4d}".format(index, len(test_normalized_features)))

'''
Approximate Kernel
'''
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
chi2sampler.fit(train_encoded_features, y_train)
train_encoded_features = chi2sampler.transform(train_encoded_features)
test_encoded_features = chi2sampler.transform(test_encoded_features)


'''
Classify Images with SVM
'''
###################################################################################
# 아래의 코드의 빈 곳(None 부분)을 채우세요.
# None 부분 외의 부분은 가급적 수정 하지 말고, 주어진 형식에 맞추어
# None 부분 만을 채워주세요. 임의적으로 전체적인 구조를 수정하셔도 좋지만,
# 파이썬 코딩에 익숙 하지 않으시면, 가급적 틀을 유지하시는 것을 권장합니다.
# 1) 아래의 model 부분에 sklearn 패키지를 활용하여, Linear SVM(SVC) 모델을 정의하세요.
#    처음에는 SVM의 parameter를 기본으로 설정하여 구동하시길 권장합니다.
#    구동 성공 시, SVM의 C 값과 max_iter 파라미터 등을 조정하여 성능 향상을 해보시길 바랍니다.
###################################################################################
# Write Your Code Here ############################################################
model = LinearSVC(C=0.01, max_iter=100, verbose=True)
###################################################################################

print("Classify Images ...")
model.fit(train_encoded_features, y_train)
train_score = model.score(train_encoded_features, y_train)
test_score = model.score(test_encoded_features, y_test)

elapsed_time = time.time() - start_time

'''
Print Results
'''
print()
print("=" * 90)
print("Train  Score: {:.5f}".format(train_score))
print("Test   Score: {:.5f}".format(test_score))
print("Elapsed Time: {:.2f} secs".format(elapsed_time))
print("=" * 90)




Load Dataset ...


  x_train = np.array(x_train)[random_indices].tolist()


Extract Train Features ...    1/1538
Extract Train Features ...    2/1538
Extract Train Features ...    3/1538
Extract Train Features ...    4/1538
Extract Train Features ...    5/1538
Extract Train Features ...    6/1538
Extract Train Features ...    7/1538
Extract Train Features ...    8/1538
Extract Train Features ...    9/1538
Extract Train Features ...   10/1538
Extract Train Features ...   11/1538
Extract Train Features ...   12/1538
Extract Train Features ...   13/1538
Extract Train Features ...   14/1538
Extract Train Features ...   15/1538
Extract Train Features ...   16/1538
Extract Train Features ...   17/1538
Extract Train Features ...   18/1538
Extract Train Features ...   19/1538
Extract Train Features ...   20/1538
Extract Train Features ...   21/1538
Extract Train Features ...   22/1538
Extract Train Features ...   23/1538
Extract Train Features ...   24/1538
Extract Train Features ...   25/1538
Extract Train Features ...   26/1538
Extract Train Features ...   27/1538
E



Report: 

    특징 추출 알고리즘을 ORB에서 SIFT로 바꾸고, key point 만들 때의 patch_stride를 16에서 10으로 바꿔주었을 때 train 데이터에 대해서 0.603, test 데이터에 대해서 0.490 정확도를 보이며 앞선 모델보다 높은 정확도를 보였다. 이는 patch_stride를 줄여 보다 촘촘하게 key point를 추출하였기 때문이며, patch_stride를 더 줄여 더 많은 keypoint를 추출하게 된다면 더 높은 정확도를 보일 수 있으나 연산량 역시 크게 증가해 시간을 많이 소요할 것으로 예상된다.

2-2. 

    [K-means] K-means의 visual work 개수 K를 변경해보며 성능을 개선해보고, 성능 증감의 이 유를 분석해보세요.

In [4]:
import time
import cv2
import glob
import os
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.svm import LinearSVC

start_time = time.time()

'''
Parameters
'''
patch_stride = 10
K = 400

'''
Load Dataset
'''
def scene15():

    train_folders = glob.glob("SCENE-15/train/*")
    train_folders.sort()
    classes = dict()
    x_train = list()
    y_train = list()
    for index, folder in enumerate(train_folders):
        label = os.path.basename(folder)
        classes[label] = index
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_train.append(cv2.imread(path, 0))
            y_train.append(index)

    x_test = list()
    y_test = list()
    test_folders = glob.glob("SCENE-15/test/*")
    test_folders.sort()
    for folder in test_folders:
        label = os.path.basename(folder)
        index = classes[label]
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_test.append(cv2.imread(path, 0))
            y_test.append(index)

    return x_train, y_train, x_test, y_test, sorted(classes.keys())

print("Load Dataset ...")
x_train, y_train, x_test, y_test, labels_names = scene15()

random_indices = list(range(len(y_train)))
random.shuffle(random_indices)
x_train = np.array(x_train)[random_indices].tolist()
y_train = np.array(y_train)[random_indices].tolist()

'''
Extract Patches
'''
train_key_points = list()
train_feature_shapes = list()
for image in x_train:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    train_key_points.append(image_key_points)
    train_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))

test_key_points = list()
test_feature_shapes = list()
for image in x_test:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    test_key_points.append(image_key_points)
    test_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))
    
    
'''
Extract Features
'''
descriptor = cv2.xfeatures2d.SIFT_create()
train_features = list()
index = 0
for image, key_points in zip(x_train, train_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    train_features.append(features)
    index += 1
    print("Extract Train Features ... {:4d}/{:4d}".format(index, len(x_train)))

test_features = list()
index = 0
for image, key_points in zip(x_test, test_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    test_features.append(features)
    index += 1
    print("Extract Test Features ... {:4d}/{:4d}".format(index, len(x_test)))

'''
Normalizing
'''
flattened_train_features = np.concatenate(train_features, axis=0)
pca = PCA(n_components=flattened_train_features.shape[-1], whiten=True)
pca.fit(flattened_train_features)
train_normalized_features = list()
index = 0
for features in train_features:
    features = pca.transform(features)
    train_normalized_features.append(features)
    index += 1
    print("Normalize Train Features ... {:4d}/{:4d}".format(index, len(train_features)))
test_normalized_features = list()
index = 0
for features in test_features:
    features = pca.transform(features)
    test_normalized_features.append(features)
    index += 1
    print("Normalize Test Features ... {:4d}/{:4d}".format(index, len(test_features)))
    

class Codebook:

    def __init__(self, K):

        self.K = K

        self.kmeans = KMeans(n_clusters=K, verbose=True)

    def make_code_words(self, features):

        self.kmeans.fit(features)

    def encode(self, features, shapes):

        distances = self.kmeans.transform(features)

        
        representations = np.zeros(dtype=np.int64, shape=(self.K,))

        nearest_centroid_idx = self.kmeans.predict(features)
        for idx in nearest_centroid_idx:
            representations[idx] += 1
            
        if np.array(representations).shape != (self.K, ):
            print("Your code may be wrong")

        return representations

print("Make Codebook ...")
flattened_normalized_train_features = pca.transform(flattened_train_features)
codebook = Codebook(K)
codebook.make_code_words(flattened_normalized_train_features)

'''
Encode Features
'''
train_encoded_features = list()
index = 0
for features, shapes in zip(train_normalized_features, train_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    train_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Train Features ... {:4d}/{:4d}".format(index, len(train_normalized_features)))
test_encoded_features = list()
index = 0
for features, shapes in zip(test_normalized_features, test_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    test_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Text Features ... {:4d}/{:4d}".format(index, len(test_normalized_features)))

'''
Approximate Kernel
'''
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
chi2sampler.fit(train_encoded_features, y_train)
train_encoded_features = chi2sampler.transform(train_encoded_features)
test_encoded_features = chi2sampler.transform(test_encoded_features)


'''
Classify Images with SVM
'''

model = LinearSVC(C=0.01, max_iter=100, verbose=True)

print("Classify Images ...")
model.fit(train_encoded_features, y_train)
train_score = model.score(train_encoded_features, y_train)
test_score = model.score(test_encoded_features, y_test)

elapsed_time = time.time() - start_time

'''
Print Results
'''
print()
print("=" * 90)
print("Train  Score: {:.5f}".format(train_score))
print("Test   Score: {:.5f}".format(test_score))
print("Elapsed Time: {:.2f} secs".format(elapsed_time))
print("=" * 90)


Load Dataset ...


  x_train = np.array(x_train)[random_indices].tolist()


Extract Train Features ...    1/1538
Extract Train Features ...    2/1538
Extract Train Features ...    3/1538
Extract Train Features ...    4/1538
Extract Train Features ...    5/1538
Extract Train Features ...    6/1538
Extract Train Features ...    7/1538
Extract Train Features ...    8/1538
Extract Train Features ...    9/1538
Extract Train Features ...   10/1538
Extract Train Features ...   11/1538
Extract Train Features ...   12/1538
Extract Train Features ...   13/1538
Extract Train Features ...   14/1538
Extract Train Features ...   15/1538
Extract Train Features ...   16/1538
Extract Train Features ...   17/1538
Extract Train Features ...   18/1538
Extract Train Features ...   19/1538
Extract Train Features ...   20/1538
Extract Train Features ...   21/1538
Extract Train Features ...   22/1538
Extract Train Features ...   23/1538
Extract Train Features ...   24/1538
Extract Train Features ...   25/1538
Extract Train Features ...   26/1538
Extract Train Features ...   27/1538
E

Report: 

    2-1)의 설정을 유지하면서 K-means의 visual work 개수 K를 20개에서 400개로 변경하였을 때, train 데이터에 대해 1.0, test 데이터에 대해 0.626 정확도를 보이며 성능이 크게 향상된 것을 볼 수 있다. 이는 representation 의 개수를 늘림으로서 더 다양하고 세세하게 특징들을 분류하여 나타낼 수 있기 때문이다. 

2-3. 

    [SVM] SVM의 학습 파라미터인 C, max_iter 값 등을 수정해보며 성능을 개선해보고, 성능 증 감의 이유를 분석해보세요.

In [22]:
import time
import cv2
import glob
import os
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

start_time = time.time()

'''
Parameters
'''
patch_stride = 10
K = 400

'''
Load Dataset
'''
def scene15():

    train_folders = glob.glob("SCENE-15/train/*")
    train_folders.sort()
    classes = dict()
    x_train = list()
    y_train = list()
    for index, folder in enumerate(train_folders):
        label = os.path.basename(folder)
        classes[label] = index
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_train.append(cv2.imread(path, 0))
            y_train.append(index)

    x_test = list()
    y_test = list()
    test_folders = glob.glob("SCENE-15/test/*")
    test_folders.sort()
    for folder in test_folders:
        label = os.path.basename(folder)
        index = classes[label]
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_test.append(cv2.imread(path, 0))
            y_test.append(index)

    return x_train, y_train, x_test, y_test, sorted(classes.keys())

print("Load Dataset ...")
x_train, y_train, x_test, y_test, labels_names = scene15()

random_indices = list(range(len(y_train)))
random.shuffle(random_indices)
x_train = np.array(x_train)[random_indices].tolist()
y_train = np.array(y_train)[random_indices].tolist()

'''
Extract Patches
'''
train_key_points = list()
train_feature_shapes = list()
for image in x_train:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    train_key_points.append(image_key_points)
    train_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))

test_key_points = list()
test_feature_shapes = list()
for image in x_test:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    test_key_points.append(image_key_points)
    test_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))
    
    
'''
Extract Features
'''
descriptor = cv2.xfeatures2d.SIFT_create()
train_features = list()
index = 0
for image, key_points in zip(x_train, train_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    train_features.append(features)
    index += 1
    print("Extract Train Features ... {:4d}/{:4d}".format(index, len(x_train)))

test_features = list()
index = 0
for image, key_points in zip(x_test, test_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    test_features.append(features)
    index += 1
    print("Extract Test Features ... {:4d}/{:4d}".format(index, len(x_test)))

'''
Normalizing
'''
flattened_train_features = np.concatenate(train_features, axis=0)
pca = PCA(n_components=flattened_train_features.shape[-1], whiten=True)
pca.fit(flattened_train_features)
train_normalized_features = list()
index = 0
for features in train_features:
    features = pca.transform(features)
    train_normalized_features.append(features)
    index += 1
    print("Normalize Train Features ... {:4d}/{:4d}".format(index, len(train_features)))
test_normalized_features = list()
index = 0
for features in test_features:
    features = pca.transform(features)
    test_normalized_features.append(features)
    index += 1
    print("Normalize Test Features ... {:4d}/{:4d}".format(index, len(test_features)))
    

class Codebook:

    def __init__(self, K):

        self.K = K

        self.kmeans = KMeans(n_clusters=K, verbose=True)

    def make_code_words(self, features):

        self.kmeans.fit(features)

    def encode(self, features, shapes):

        distances = self.kmeans.transform(features)

        
        representations = np.zeros(dtype=np.int64, shape=(self.K,))

        nearest_centroid_idx = self.kmeans.predict(features)
        for idx in nearest_centroid_idx:
            representations[idx] += 1
            
        if np.array(representations).shape != (self.K, ):
            print("Your code may be wrong")

        return representations

print("Make Codebook ...")
flattened_normalized_train_features = pca.transform(flattened_train_features)
codebook = Codebook(K)
codebook.make_code_words(flattened_normalized_train_features)

'''
Encode Features
'''
train_encoded_features = list()
index = 0
for features, shapes in zip(train_normalized_features, train_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    train_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Train Features ... {:4d}/{:4d}".format(index, len(train_normalized_features)))
test_encoded_features = list()
index = 0
for features, shapes in zip(test_normalized_features, test_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    test_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Text Features ... {:4d}/{:4d}".format(index, len(test_normalized_features)))

'''
Approximate Kernel
'''
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
chi2sampler.fit(train_encoded_features, y_train)
train_encoded_features = chi2sampler.transform(train_encoded_features)
test_encoded_features = chi2sampler.transform(test_encoded_features)


'''
Classify Images with SVM
'''

model = LinearSVC()
parameters = {'C':[1, 0.1, 0.01], 'random_state':[1,2,3], 'max_iter':[50, 100, 150]}

print("Classify Images ...")
clf = GridSearchCV(model, parameters)
clf.fit(train_encoded_features, y_train)

train_score = clf.score(train_encoded_features, y_train)
test_score = clf.score(test_encoded_features, y_test)

elapsed_time = time.time() - start_time

'''
Print Results
'''
print()
print("=" * 90)
print("Train  Score: {:.5f}".format(train_score))
print("Test   Score: {:.5f}".format(test_score))
print("Elapsed Time: {:.2f} secs".format(elapsed_time))
print("=" * 90)
print(f"Best parameter combination is: {clf.best_params_}")
print(f"Best score is: {clf.best_score_}")


Load Dataset ...


  x_train = np.array(x_train)[random_indices].tolist()


Extract Train Features ...    1/1538
Extract Train Features ...    2/1538
Extract Train Features ...    3/1538
Extract Train Features ...    4/1538
Extract Train Features ...    5/1538
Extract Train Features ...    6/1538
Extract Train Features ...    7/1538
Extract Train Features ...    8/1538
Extract Train Features ...    9/1538
Extract Train Features ...   10/1538
Extract Train Features ...   11/1538
Extract Train Features ...   12/1538
Extract Train Features ...   13/1538
Extract Train Features ...   14/1538
Extract Train Features ...   15/1538
Extract Train Features ...   16/1538
Extract Train Features ...   17/1538
Extract Train Features ...   18/1538
Extract Train Features ...   19/1538
Extract Train Features ...   20/1538
Extract Train Features ...   21/1538
Extract Train Features ...   22/1538
Extract Train Features ...   23/1538
Extract Train Features ...   24/1538
Extract Train Features ...   25/1538
Extract Train Features ...   26/1538
Extract Train Features ...   27/1538
E




Train  Score: 1.00000
Test   Score: 0.57916
Elapsed Time: 1358.16 secs
Best parameter combination is: {'C': 0.01, 'max_iter': 50, 'random_state': 1}
Best score is: 0.605971064765853




Report:

    2-2) 의 설정을 유지하면서 SVM 의 학습 파라미터 값 C와 max_iter, random_state 값을 grid search를 사용해 학습하고 그 중 가장 높은 정확도를 보이는 조합을 출력해보았다. 이때 가장 높은 정확도를 보이는 모델의 파라미터 조합은 {'C': 0.01, 'max_iter': 50, 'random_state': 1} 이며, train 데이터에 대해서 1.00, test 데이터에 대해서 0.579 정확도를 보이며 train에서 오버피팅이 발생했음을 알 수 있다.

2-4. 

    실험 결과를 토대로 Best model을 선정하고, Best model의 Confusion matrix를 분석해보세요.

In [34]:
import time
import cv2
import glob
import os
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

start_time = time.time()

'''
Parameters
'''
patch_stride = 10
K = 400

'''
Load Dataset
'''
def scene15():

    train_folders = glob.glob("SCENE-15/train/*")
    train_folders.sort()
    classes = dict()
    x_train = list()
    y_train = list()
    for index, folder in enumerate(train_folders):
        label = os.path.basename(folder)
        classes[label] = index
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_train.append(cv2.imread(path, 0))
            y_train.append(index)

    x_test = list()
    y_test = list()
    test_folders = glob.glob("SCENE-15/test/*")
    test_folders.sort()
    for folder in test_folders:
        label = os.path.basename(folder)
        index = classes[label]
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_test.append(cv2.imread(path, 0))
            y_test.append(index)

    return x_train, y_train, x_test, y_test, sorted(classes.keys())

print("Load Dataset ...")
x_train, y_train, x_test, y_test, labels_names = scene15()

random_indices = list(range(len(y_train)))
random.shuffle(random_indices)
x_train = np.array(x_train)[random_indices].tolist()
y_train = np.array(y_train)[random_indices].tolist()

'''
Extract Patches
'''
train_key_points = list()
train_feature_shapes = list()
for image in x_train:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    train_key_points.append(image_key_points)
    train_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))

test_key_points = list()
test_feature_shapes = list()
for image in x_test:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    test_key_points.append(image_key_points)
    test_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))
    
    
'''
Extract Features
'''
descriptor = cv2.ORB_create()
train_features = list()
index = 0
for image, key_points in zip(x_train, train_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    train_features.append(features)
    index += 1
    print("Extract Train Features ... {:4d}/{:4d}".format(index, len(x_train)))

test_features = list()
index = 0
for image, key_points in zip(x_test, test_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    test_features.append(features)
    index += 1
    print("Extract Test Features ... {:4d}/{:4d}".format(index, len(x_test)))

'''
Normalizing
'''
flattened_train_features = np.concatenate(train_features, axis=0)
pca = PCA(n_components=flattened_train_features.shape[-1], whiten=True)
pca.fit(flattened_train_features)
train_normalized_features = list()
index = 0
for features in train_features:
    features = pca.transform(features)
    train_normalized_features.append(features)
    index += 1
    print("Normalize Train Features ... {:4d}/{:4d}".format(index, len(train_features)))
test_normalized_features = list()
index = 0
for features in test_features:
    features = pca.transform(features)
    test_normalized_features.append(features)
    index += 1
    print("Normalize Test Features ... {:4d}/{:4d}".format(index, len(test_features)))
    

class Codebook:

    def __init__(self, K):

        self.K = K

        self.kmeans = KMeans(n_clusters=K, verbose=True)

    def make_code_words(self, features):

        self.kmeans.fit(features)

    def encode(self, features, shapes):

        distances = self.kmeans.transform(features)

        
        representations = np.zeros(dtype=np.int64, shape=(self.K,))

        nearest_centroid_idx = self.kmeans.predict(features)
        for idx in nearest_centroid_idx:
            representations[idx] += 1
            
        if np.array(representations).shape != (self.K, ):
            print("Your code may be wrong")

        return representations

print("Make Codebook ...")
flattened_normalized_train_features = pca.transform(flattened_train_features)
codebook = Codebook(K)
codebook.make_code_words(flattened_normalized_train_features)

'''
Encode Features
'''
train_encoded_features = list()
index = 0
for features, shapes in zip(train_normalized_features, train_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    train_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Train Features ... {:4d}/{:4d}".format(index, len(train_normalized_features)))
test_encoded_features = list()
index = 0
for features, shapes in zip(test_normalized_features, test_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    test_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Text Features ... {:4d}/{:4d}".format(index, len(test_normalized_features)))

'''
Approximate Kernel
'''
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
chi2sampler.fit(train_encoded_features, y_train)
train_encoded_features = chi2sampler.transform(train_encoded_features)
test_encoded_features = chi2sampler.transform(test_encoded_features)


'''
Classify Images with SVM
'''

model = LinearSVC(random_state=100)
parameters = {'C':[1, 0.1, 0.01], 'max_iter':[50, 100, 200]}

print("Classify Images ...")
clf = GridSearchCV(model, parameters)
clf.fit(train_encoded_features, y_train)

train_score = clf.score(train_encoded_features, y_train)
test_score = clf.score(test_encoded_features, y_test)

elapsed_time = time.time() - start_time

'''
Print Results
'''
print()
print("=" * 90)
print("Train  Score: {:.5f}".format(train_score))
print("Test   Score: {:.5f}".format(test_score))
print("Elapsed Time: {:.2f} secs".format(elapsed_time))
print("=" * 90)
print(f"Best parameter combination is: {clf.best_params_}")
print(f"Best score is: {clf.best_score_}")


Load Dataset ...


  x_train = np.array(x_train)[random_indices].tolist()


Extract Train Features ...    1/1538
Extract Train Features ...    2/1538
Extract Train Features ...    3/1538
Extract Train Features ...    4/1538
Extract Train Features ...    5/1538
Extract Train Features ...    6/1538
Extract Train Features ...    7/1538
Extract Train Features ...    8/1538
Extract Train Features ...    9/1538
Extract Train Features ...   10/1538
Extract Train Features ...   11/1538
Extract Train Features ...   12/1538
Extract Train Features ...   13/1538
Extract Train Features ...   14/1538
Extract Train Features ...   15/1538
Extract Train Features ...   16/1538
Extract Train Features ...   17/1538
Extract Train Features ...   18/1538
Extract Train Features ...   19/1538
Extract Train Features ...   20/1538
Extract Train Features ...   21/1538
Extract Train Features ...   22/1538
Extract Train Features ...   23/1538
Extract Train Features ...   24/1538
Extract Train Features ...   25/1538
Extract Train Features ...   26/1538
Extract Train Features ...   27/1538
E




Train  Score: 0.97074
Test   Score: 0.35554
Elapsed Time: 306.77 secs
Best parameter combination is: {'C': 0.01, 'max_iter': 50}
Best score is: 0.3901011041076187


In [35]:
from sklearn.metrics import classification_report

y_pred = clf.predict(test_encoded_features)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.26      0.21      0.23       109
           1       0.47      0.50      0.49       100
           2       0.60      0.63      0.61       100
           3       0.49      0.60      0.54       100
           4       0.11      0.10      0.10       100
           5       0.26      0.23      0.24       100
           6       0.21      0.17      0.19       100
           7       0.20      0.16      0.18       100
           8       0.46      0.52      0.49       100
           9       0.29      0.35      0.32       100
          10       0.26      0.27      0.26       100
          11       0.22      0.24      0.23       100
          12       0.45      0.33      0.38       100
          13       0.35      0.37      0.36       101
          14       0.54      0.65      0.59       106

    accuracy                           0.36      1516
   macro avg       0.34      0.36      0.35      1516
weighted avg       0.34   

In [32]:
import time
import cv2
import glob
import os
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

start_time = time.time()

'''
Parameters
'''
patch_stride = 10
K = 400

'''
Load Dataset
'''
def scene15():

    train_folders = glob.glob("SCENE-15/train/*")
    train_folders.sort()
    classes = dict()
    x_train = list()
    y_train = list()
    for index, folder in enumerate(train_folders):
        label = os.path.basename(folder)
        classes[label] = index
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_train.append(cv2.imread(path, 0))
            y_train.append(index)

    x_test = list()
    y_test = list()
    test_folders = glob.glob("SCENE-15/test/*")
    test_folders.sort()
    for folder in test_folders:
        label = os.path.basename(folder)
        index = classes[label]
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_test.append(cv2.imread(path, 0))
            y_test.append(index)

    return x_train, y_train, x_test, y_test, sorted(classes.keys())

print("Load Dataset ...")
x_train, y_train, x_test, y_test, labels_names = scene15()

random_indices = list(range(len(y_train)))
random.shuffle(random_indices)
x_train = np.array(x_train)[random_indices].tolist()
y_train = np.array(y_train)[random_indices].tolist()

'''
Extract Patches
'''
train_key_points = list()
train_feature_shapes = list()
for image in x_train:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    train_key_points.append(image_key_points)
    train_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))

test_key_points = list()
test_feature_shapes = list()
for image in x_test:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    test_key_points.append(image_key_points)
    test_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))
    
    
'''
Extract Features
'''
descriptor = cv2.xfeatures2d.SIFT_create()
train_features = list()
index = 0
for image, key_points in zip(x_train, train_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    train_features.append(features)
    index += 1
    print("Extract Train Features ... {:4d}/{:4d}".format(index, len(x_train)))

test_features = list()
index = 0
for image, key_points in zip(x_test, test_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    test_features.append(features)
    index += 1
    print("Extract Test Features ... {:4d}/{:4d}".format(index, len(x_test)))

'''
Normalizing
'''
flattened_train_features = np.concatenate(train_features, axis=0)
pca = PCA(n_components=flattened_train_features.shape[-1], whiten=True)
pca.fit(flattened_train_features)
train_normalized_features = list()
index = 0
for features in train_features:
    features = pca.transform(features)
    train_normalized_features.append(features)
    index += 1
    print("Normalize Train Features ... {:4d}/{:4d}".format(index, len(train_features)))
test_normalized_features = list()
index = 0
for features in test_features:
    features = pca.transform(features)
    test_normalized_features.append(features)
    index += 1
    print("Normalize Test Features ... {:4d}/{:4d}".format(index, len(test_features)))
    

class Codebook:

    def __init__(self, K):

        self.K = K

        self.kmeans = KMeans(n_clusters=K, verbose=True)

    def make_code_words(self, features):

        self.kmeans.fit(features)

    def encode(self, features, shapes):

        distances = self.kmeans.transform(features)

        
        representations = np.zeros(dtype=np.int64, shape=(self.K,))

        nearest_centroid_idx = self.kmeans.predict(features)
        for idx in nearest_centroid_idx:
            representations[idx] += 1
            
        if np.array(representations).shape != (self.K, ):
            print("Your code may be wrong")

        return representations

print("Make Codebook ...")
flattened_normalized_train_features = pca.transform(flattened_train_features)
codebook = Codebook(K)
codebook.make_code_words(flattened_normalized_train_features)

'''
Encode Features
'''
train_encoded_features = list()
index = 0
for features, shapes in zip(train_normalized_features, train_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    train_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Train Features ... {:4d}/{:4d}".format(index, len(train_normalized_features)))
test_encoded_features = list()
index = 0
for features, shapes in zip(test_normalized_features, test_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    test_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Text Features ... {:4d}/{:4d}".format(index, len(test_normalized_features)))

'''
Approximate Kernel
'''
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
chi2sampler.fit(train_encoded_features, y_train)
train_encoded_features = chi2sampler.transform(train_encoded_features)
test_encoded_features = chi2sampler.transform(test_encoded_features)


'''
Classify Images with SVM
'''

model = LinearSVC(random_state=100)
parameters = {'C':[1, 0.1, 0.01], 'max_iter':[50, 100, 200]}

print("Classify Images ...")
clf = GridSearchCV(model, parameters)
clf.fit(train_encoded_features, y_train)

train_score = clf.score(train_encoded_features, y_train)
test_score = clf.score(test_encoded_features, y_test)

elapsed_time = time.time() - start_time

'''
Print Results
'''
print()
print("=" * 90)
print("Train  Score: {:.5f}".format(train_score))
print("Test   Score: {:.5f}".format(test_score))
print("Elapsed Time: {:.2f} secs".format(elapsed_time))
print("=" * 90)
print(f"Best parameter combination is: {clf.best_params_}")
print(f"Best score is: {clf.best_score_}")


Load Dataset ...


  x_train = np.array(x_train)[random_indices].tolist()


Extract Train Features ...    1/1538
Extract Train Features ...    2/1538
Extract Train Features ...    3/1538
Extract Train Features ...    4/1538
Extract Train Features ...    5/1538
Extract Train Features ...    6/1538
Extract Train Features ...    7/1538
Extract Train Features ...    8/1538
Extract Train Features ...    9/1538
Extract Train Features ...   10/1538
Extract Train Features ...   11/1538
Extract Train Features ...   12/1538
Extract Train Features ...   13/1538
Extract Train Features ...   14/1538
Extract Train Features ...   15/1538
Extract Train Features ...   16/1538
Extract Train Features ...   17/1538
Extract Train Features ...   18/1538
Extract Train Features ...   19/1538
Extract Train Features ...   20/1538
Extract Train Features ...   21/1538
Extract Train Features ...   22/1538
Extract Train Features ...   23/1538
Extract Train Features ...   24/1538
Extract Train Features ...   25/1538
Extract Train Features ...   26/1538
Extract Train Features ...   27/1538
E




Train  Score: 0.99935
Test   Score: 0.60686
Elapsed Time: 1378.11 secs
Best parameter combination is: {'C': 0.01, 'max_iter': 50}
Best score is: 0.5962308050255933




In [33]:
from sklearn.metrics import classification_report

y_pred = clf.predict(test_encoded_features)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.46      0.49      0.47       109
           1       0.69      0.72      0.71       100
           2       0.78      0.97      0.86       100
           3       0.79      0.71      0.75       100
           4       0.36      0.37      0.36       100
           5       0.56      0.59      0.58       100
           6       0.42      0.44      0.43       100
           7       0.40      0.42      0.41       100
           8       0.79      0.72      0.75       100
           9       0.56      0.50      0.53       100
          10       0.57      0.57      0.57       100
          11       0.52      0.45      0.48       100
          12       0.77      0.64      0.70       100
          13       0.75      0.82      0.79       101
          14       0.72      0.70      0.71       106

    accuracy                           0.61      1516
   macro avg       0.61      0.61      0.61      1516
weighted avg       0.61   

Report: 

    위 실험을 토대로 우선, patch_stride를 10, K-means의 visual work 개수 K를 400으로 설정하고 ORB descriptor를 사용해 특징을 추출했다. 그 뒤 random seed를 고정해 SVM의 학습 파라미터 C와 max_iter의 조합을 grid search를 통해 찾고 이를 출력했다. 이때 가장 높은 정확도를 보이는 모델의 파라미터 조합은 {'C': 0.01, 'max_iter': 50} 이며, train 데이터에 대해서 0.970, test 데이터에 대해서 0.355 정확도를 보였다.
    다음으로 patch_stride와 K-means의 visual work 개수 K를 똑같이 400으로 설정하고 SIFT descriptor를 사용해 특징을 추출했을 때 가장 높은 정확도가 나온 조합은 {'C': 0.01, 'max_iter': 50}로, 앞선 ORB descriptor를 사용했을 때와 같은 조합이 나왔다. 그러나 이때 본 모델에서 train 데이터에 대해 0.999, test 데이터에 대해 0.606 정확도를 보이며 ORB descriptor와 SIFT descriptor 중 SIFT descriptor를 사용할 때 더 좋은 성능을 보이는 것을 알 수 있다.
    ORB를 사용한 모델의 classification report를 분석해보면 전체적인 classification 성능이 낮은 것을 볼 수 있고 SIFT를 사용한 모델의 classification report를 분석해보면 0, 4, 6, 7 라벨을 제외한 나머지 라벨에 대해서 분류가 잘 진행되는 것을 볼 수 있다.
    

# Problem 3

3-1.
    
    템플릿 코드를 기반으로 구현한 현재 모델은 분류를 위해 SVM을 사용하고 있습니다. Sklearn 패키지에서 SVM외에 다른 classifier를 2가지 이상 사용해 실험을 진행해보고 결과를 분석해보세요.

In [2]:
import time
import cv2
import glob
import os
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import SGDClassifier


start_time = time.time()

'''
Parameters
'''
patch_stride = 10
K = 400

'''
Load Dataset
'''
def scene15():

    train_folders = glob.glob("SCENE-15/train/*")
    train_folders.sort()
    classes = dict()
    x_train = list()
    y_train = list()
    for index, folder in enumerate(train_folders):
        label = os.path.basename(folder)
        classes[label] = index
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_train.append(cv2.imread(path, 0))
            y_train.append(index)

    x_test = list()
    y_test = list()
    test_folders = glob.glob("SCENE-15/test/*")
    test_folders.sort()
    for folder in test_folders:
        label = os.path.basename(folder)
        index = classes[label]
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_test.append(cv2.imread(path, 0))
            y_test.append(index)

    return x_train, y_train, x_test, y_test, sorted(classes.keys())

print("Load Dataset ...")
x_train, y_train, x_test, y_test, labels_names = scene15()

random_indices = list(range(len(y_train)))
random.shuffle(random_indices)
x_train = np.array(x_train)[random_indices].tolist()
y_train = np.array(y_train)[random_indices].tolist()

'''
Extract Patches
'''
train_key_points = list()
train_feature_shapes = list()
for image in x_train:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    train_key_points.append(image_key_points)
    train_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))

test_key_points = list()
test_feature_shapes = list()
for image in x_test:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    test_key_points.append(image_key_points)
    test_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))
    
    
'''
Extract Features
'''
descriptor = cv2.xfeatures2d.SIFT_create()
train_features = list()
index = 0
for image, key_points in zip(x_train, train_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    train_features.append(features)
    index += 1
    print("Extract Train Features ... {:4d}/{:4d}".format(index, len(x_train)))

test_features = list()
index = 0
for image, key_points in zip(x_test, test_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    test_features.append(features)
    index += 1
    print("Extract Test Features ... {:4d}/{:4d}".format(index, len(x_test)))

'''
Normalizing
'''
flattened_train_features = np.concatenate(train_features, axis=0)
pca = PCA(n_components=flattened_train_features.shape[-1], whiten=True)
pca.fit(flattened_train_features)
train_normalized_features = list()
index = 0
for features in train_features:
    features = pca.transform(features)
    train_normalized_features.append(features)
    index += 1
    print("Normalize Train Features ... {:4d}/{:4d}".format(index, len(train_features)))
test_normalized_features = list()
index = 0
for features in test_features:
    features = pca.transform(features)
    test_normalized_features.append(features)
    index += 1
    print("Normalize Test Features ... {:4d}/{:4d}".format(index, len(test_features)))
    

class Codebook:

    def __init__(self, K):

        self.K = K

        self.kmeans = KMeans(n_clusters=K, verbose=True)

    def make_code_words(self, features):

        self.kmeans.fit(features)

    def encode(self, features, shapes):

        distances = self.kmeans.transform(features)

        
        representations = np.zeros(dtype=np.int64, shape=(self.K,))

        nearest_centroid_idx = self.kmeans.predict(features)
        for idx in nearest_centroid_idx:
            representations[idx] += 1
            
        if np.array(representations).shape != (self.K, ):
            print("Your code may be wrong")

        return representations

print("Make Codebook ...")
flattened_normalized_train_features = pca.transform(flattened_train_features)
codebook = Codebook(K)
codebook.make_code_words(flattened_normalized_train_features)

'''
Encode Features
'''
train_encoded_features = list()
index = 0
for features, shapes in zip(train_normalized_features, train_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    train_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Train Features ... {:4d}/{:4d}".format(index, len(train_normalized_features)))
test_encoded_features = list()
index = 0
for features, shapes in zip(test_normalized_features, test_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    test_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Text Features ... {:4d}/{:4d}".format(index, len(test_normalized_features)))

'''
Approximate Kernel
'''
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
chi2sampler.fit(train_encoded_features, y_train)
train_encoded_features = chi2sampler.transform(train_encoded_features)
test_encoded_features = chi2sampler.transform(test_encoded_features)


'''
Classify Images with MLP
'''

model = SGDClassifier(random_state=100,max_iter=500, early_stopping=True)
parameters = {'loss':[ 'squared_hinge', 'perceptron', 'squared_error'], 
              'alpha':[0.001, 0.01],
             'learning_rate':['constant', 'optimal', 'adaptive']}

print("Classify Images ...")
clf = GridSearchCV(model, parameters)
clf.fit(train_encoded_features, y_train)

train_score = clf.score(train_encoded_features, y_train)
test_score = clf.score(test_encoded_features, y_test)

elapsed_time = time.time() - start_time

'''
Print Results
'''
print()
print("=" * 90)
print("Train  Score: {:.5f}".format(train_score))
print("Test   Score: {:.5f}".format(test_score))
print("Elapsed Time: {:.2f} secs".format(elapsed_time))
print("=" * 90)
print(f"Best parameter combination is: {clf.best_params_}")
print(f"Best score is: {clf.best_score_}")


Load Dataset ...


  x_train = np.array(x_train)[random_indices].tolist()


Extract Train Features ...    1/1538
Extract Train Features ...    2/1538
Extract Train Features ...    3/1538
Extract Train Features ...    4/1538
Extract Train Features ...    5/1538
Extract Train Features ...    6/1538
Extract Train Features ...    7/1538
Extract Train Features ...    8/1538
Extract Train Features ...    9/1538
Extract Train Features ...   10/1538
Extract Train Features ...   11/1538
Extract Train Features ...   12/1538
Extract Train Features ...   13/1538
Extract Train Features ...   14/1538
Extract Train Features ...   15/1538
Extract Train Features ...   16/1538
Extract Train Features ...   17/1538
Extract Train Features ...   18/1538
Extract Train Features ...   19/1538
Extract Train Features ...   20/1538
Extract Train Features ...   21/1538
Extract Train Features ...   22/1538
Extract Train Features ...   23/1538
Extract Train Features ...   24/1538
Extract Train Features ...   25/1538
Extract Train Features ...   26/1538
Extract Train Features ...   27/1538
E

60 fits failed out of a total of 90.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
60 fits failed with the following error:
Traceback (most recent call last):
  File "/home/dxlab/anaconda3/envs/chaehee_py38/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/dxlab/anaconda3/envs/chaehee_py38/lib/python3.8/site-packages/sklearn/linear_model/_stochastic_gradient.py", line 890, in fit
    return self._fit(
  File "/home/dxlab/anaconda3/envs/chaehee_py38/lib/python3.8/site-packages/sklearn/linear_model/_stochastic_gradient.py", line 658, in _fit
    self._validate_params()
  File "/home/dxlab/anaconda3/envs/chaehee_py38/lib/python3.

In [3]:
from sklearn.metrics import classification_report

y_pred = clf.predict(test_encoded_features)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.37      0.50      0.42       109
           1       0.60      0.67      0.64       100
           2       0.86      0.94      0.90       100
           3       0.72      0.71      0.72       100
           4       0.33      0.39      0.36       100
           5       0.55      0.38      0.45       100
           6       0.27      0.35      0.31       100
           7       0.31      0.26      0.28       100
           8       0.83      0.68      0.75       100
           9       0.55      0.45      0.49       100
          10       0.57      0.50      0.53       100
          11       0.45      0.34      0.39       100
          12       0.70      0.61      0.65       100
          13       0.70      0.79      0.74       101
          14       0.59      0.70      0.64       106

    accuracy                           0.55      1516
   macro avg       0.56      0.55      0.55      1516
weighted avg       0.56   

Report: 

    앞선 2-4) 의 설정을 유지하면서 모델을 SVM에서 SGD Classifier로 바꿔주었다. 이때 최적의 학습 파라미터 값을 찾기 위해 loss, alpha, learning_rate 파라미터를 grid search 를 통해 학습했다. 가장 높은 정확도를 보이는 조합은 {'alpha': 0.01, 'learning_rate': 'optimal', 'loss': 'squared_hinge'}로, train 데이터에 대해서 0.946, test 데이터에 대해서 0.551 정확도를 보였다. 이는 앞선 SVM 에서 보인 것처럼 0, 4, 6, 7 카테고리에 대해 낮은 성능을 보였으며 train 데이터에 대한 정확도와 test 데이터에 대한 정확도에 큰 차이가 나는 것으로 보아 train 데이터에 대해 오버피팅한 것으로 보인다.

In [4]:
import time
import cv2
import glob
import os
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier


start_time = time.time()

'''
Parameters
'''
patch_stride = 10
K = 400

'''
Load Dataset
'''
def scene15():

    train_folders = glob.glob("SCENE-15/train/*")
    train_folders.sort()
    classes = dict()
    x_train = list()
    y_train = list()
    for index, folder in enumerate(train_folders):
        label = os.path.basename(folder)
        classes[label] = index
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_train.append(cv2.imread(path, 0))
            y_train.append(index)

    x_test = list()
    y_test = list()
    test_folders = glob.glob("SCENE-15/test/*")
    test_folders.sort()
    for folder in test_folders:
        label = os.path.basename(folder)
        index = classes[label]
        paths = glob.glob(os.path.join(folder, "*"))
        for path in paths:
            x_test.append(cv2.imread(path, 0))
            y_test.append(index)

    return x_train, y_train, x_test, y_test, sorted(classes.keys())

print("Load Dataset ...")
x_train, y_train, x_test, y_test, labels_names = scene15()

random_indices = list(range(len(y_train)))
random.shuffle(random_indices)
x_train = np.array(x_train)[random_indices].tolist()
y_train = np.array(y_train)[random_indices].tolist()

'''
Extract Patches
'''
train_key_points = list()
train_feature_shapes = list()
for image in x_train:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    train_key_points.append(image_key_points)
    train_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))

test_key_points = list()
test_feature_shapes = list()
for image in x_test:
    h, w = image.shape
    image_key_points = list()
    for x in range(0, w, patch_stride):
        for y in range(0, h, patch_stride):
            image_key_points.append(cv2.KeyPoint(x, y, patch_stride))
    test_key_points.append(image_key_points)
    test_feature_shapes.append((len(range(0, w, patch_stride)), (len(range(0, h, patch_stride)))))
    
    
'''
Extract Features
'''
descriptor = cv2.xfeatures2d.SIFT_create()
train_features = list()
index = 0
for image, key_points in zip(x_train, train_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    train_features.append(features)
    index += 1
    print("Extract Train Features ... {:4d}/{:4d}".format(index, len(x_train)))

test_features = list()
index = 0
for image, key_points in zip(x_test, test_key_points):
    
    _, features = descriptor.compute(image, key_points)
    
    test_features.append(features)
    index += 1
    print("Extract Test Features ... {:4d}/{:4d}".format(index, len(x_test)))

'''
Normalizing
'''
flattened_train_features = np.concatenate(train_features, axis=0)
pca = PCA(n_components=flattened_train_features.shape[-1], whiten=True)
pca.fit(flattened_train_features)
train_normalized_features = list()
index = 0
for features in train_features:
    features = pca.transform(features)
    train_normalized_features.append(features)
    index += 1
    print("Normalize Train Features ... {:4d}/{:4d}".format(index, len(train_features)))
test_normalized_features = list()
index = 0
for features in test_features:
    features = pca.transform(features)
    test_normalized_features.append(features)
    index += 1
    print("Normalize Test Features ... {:4d}/{:4d}".format(index, len(test_features)))
    

class Codebook:

    def __init__(self, K):

        self.K = K

        self.kmeans = KMeans(n_clusters=K, verbose=True)

    def make_code_words(self, features):

        self.kmeans.fit(features)

    def encode(self, features, shapes):

        distances = self.kmeans.transform(features)

        
        representations = np.zeros(dtype=np.int64, shape=(self.K,))

        nearest_centroid_idx = self.kmeans.predict(features)
        for idx in nearest_centroid_idx:
            representations[idx] += 1
            
        if np.array(representations).shape != (self.K, ):
            print("Your code may be wrong")

        return representations

print("Make Codebook ...")
flattened_normalized_train_features = pca.transform(flattened_train_features)
codebook = Codebook(K)
codebook.make_code_words(flattened_normalized_train_features)

'''
Encode Features
'''
train_encoded_features = list()
index = 0
for features, shapes in zip(train_normalized_features, train_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    train_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Train Features ... {:4d}/{:4d}".format(index, len(train_normalized_features)))
test_encoded_features = list()
index = 0
for features, shapes in zip(test_normalized_features, test_feature_shapes):
    encoded_features = codebook.encode(features, shapes)
    test_encoded_features.append(encoded_features)
    index += 1
    print("Encoding Text Features ... {:4d}/{:4d}".format(index, len(test_normalized_features)))

'''
Approximate Kernel
'''
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
chi2sampler.fit(train_encoded_features, y_train)
train_encoded_features = chi2sampler.transform(train_encoded_features)
test_encoded_features = chi2sampler.transform(test_encoded_features)


'''
Classify Images with RandomForestClassifier
'''

model = RandomForestClassifier()
parameters = {'n_estimators':[50, 80 ,100], 'criterion':['gini', 'entropy']}

print("Classify Images ...")
clf = GridSearchCV(model, parameters)
clf.fit(train_encoded_features, y_train)

train_score = clf.score(train_encoded_features, y_train)
test_score = clf.score(test_encoded_features, y_test)

elapsed_time = time.time() - start_time

'''
Print Results
'''
print()
print("=" * 90)
print("Train  Score: {:.5f}".format(train_score))
print("Test   Score: {:.5f}".format(test_score))
print("Elapsed Time: {:.2f} secs".format(elapsed_time))
print("=" * 90)
print(f"Best parameter combination is: {clf.best_params_}")
print(f"Best score is: {clf.best_score_}")



Load Dataset ...


  x_train = np.array(x_train)[random_indices].tolist()


Extract Train Features ...    1/1538
Extract Train Features ...    2/1538
Extract Train Features ...    3/1538
Extract Train Features ...    4/1538
Extract Train Features ...    5/1538
Extract Train Features ...    6/1538
Extract Train Features ...    7/1538
Extract Train Features ...    8/1538
Extract Train Features ...    9/1538
Extract Train Features ...   10/1538
Extract Train Features ...   11/1538
Extract Train Features ...   12/1538
Extract Train Features ...   13/1538
Extract Train Features ...   14/1538
Extract Train Features ...   15/1538
Extract Train Features ...   16/1538
Extract Train Features ...   17/1538
Extract Train Features ...   18/1538
Extract Train Features ...   19/1538
Extract Train Features ...   20/1538
Extract Train Features ...   21/1538
Extract Train Features ...   22/1538
Extract Train Features ...   23/1538
Extract Train Features ...   24/1538
Extract Train Features ...   25/1538
Extract Train Features ...   26/1538
Extract Train Features ...   27/1538
E

In [5]:
from sklearn.metrics import classification_report

y_pred = clf.predict(test_encoded_features)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.37      0.35      0.36       109
           1       0.59      0.69      0.64       100
           2       0.76      0.96      0.85       100
           3       0.72      0.63      0.67       100
           4       0.28      0.27      0.27       100
           5       0.53      0.53      0.53       100
           6       0.32      0.45      0.37       100
           7       0.29      0.35      0.32       100
           8       0.71      0.79      0.75       100
           9       0.46      0.39      0.42       100
          10       0.62      0.59      0.61       100
          11       0.47      0.34      0.39       100
          12       0.70      0.44      0.54       100
          13       0.69      0.59      0.64       101
          14       0.63      0.64      0.64       106

    accuracy                           0.53      1516
   macro avg       0.54      0.53      0.53      1516
weighted avg       0.54   

Report: 

    앞선 2-4) 의 설정을 유지하면서 모델을 SVM에서 RandomForest Classifier로 바꿔주었다. 이때 최적의 classifier 학습 파라미터 값을 찾기 위해 n_estimators와 criterion 파라미터를 grid search를 통해 학습하여 가장 높은 조합을 출력하였다. 가장 높은 정확도를 보이는 조합은 {'criterion': 'gini', 'n_estimators': 80}로, train 데이터에 대해서 1.0, test 데이터에 대해서 0.533 정확도를 보여 train 데이터에 대한 오버피팅 문제가 생겼음을 알 수 있다. 이때 classification repor를 살펴보면 앞선 분류 모델과 마찬가지로 0, 4, 6, 7 카테고리 분류에 있어서 아주 낮은 성능을 보이는 것을 알 수 있다.
    전체적인 모델 파라미터 튜닝과 모델 선정에 있어서 SIFT descriptor를 사용해 특징을 추출했을 때 높은 정확도가 나왔으며, SVM, SGD, Randomforest 모델을 사용해 학습했을 때 train 데이터에 대해서 RandomForest > SVM > SGD 순으로 score가 높게 나왔다. test 데이터에 대해서는 SVM > RandomForest > SGD 순으로 score가 높게 나와 본 데이터셋에 대해 SVM 모델을 사용하는 것이 적합함을 알 수 있다. 또한 모든 모델에 대해 0, 4, 6, 7 카테고리 분류 성능이 낮은 것을 보아, 각 카테고리에 속하는 데이터를 분석해보고 본 카테고리의 특징을 본 실험보다 더 잘 추출할 수 있도록 한다면 후에 모델의 성능을 더욱 높일 수 있을 것이다.