# 1 Problem Definition

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os

os.chdir('/content/drive/MyDrive/고머딥/기말프로젝트')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
from PIL import Image
from timeit import default_timer as timer

# 2 Data

In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

## 2-1 ) Train Load

In [None]:
from glob import glob

label_dict = {
    'downdog':0,
    'goddess':1,
    'mountain':2,
    'tree':3,
    'warrior1':4,
    'warrior2':5
}

train_label_list = []
train_img_list = []
for label_path in glob('/content/drive/MyDrive/고머딥/기말프로젝트/for-student/train/*'):
    for img_path in glob(label_path + "/*"):
        image = np.array(Image.open(img_path).convert("RGB").resize((128, 128)))
        train_img_list.append(image)
        train_label_list.append(label_dict[os.path.basename(label_path)])
train_label_list = np.array(train_label_list)
train_img_list = np.array(train_img_list)

In [None]:
np.random.seed(42)
random_indices =np.random.permutation(len(train_label_list))
train_label_list = train_label_list[random_indices]
train_img_list = train_img_list[random_indices]

In [None]:
train_img_list.shape, train_label_list.shape 

((2484, 128, 128, 3), (2484,))

In [None]:
np.bincount(train_label_list)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(train_img_list, train_label_list, test_size = 0.2,  random_state=1215)

In [None]:
y_train, y_val

## 2-2) Test load

In [None]:
import natsort
test = pd.DataFrame()

all_img_list = glob('/content/drive/MyDrive/고머딥/기말프로젝트/for-student/test/*')
test['img_path'] = all_img_list
test_image_values = natsort.natsorted(test['img_path'].values)

test_img_list = []
for img_path in test_image_values:
  image = np.array(Image.open(img_path).convert("RGB").resize((128, 128)))
  test_img_list.append(image)
test_img_list = np.array(test_img_list)

In [None]:
len(test_img_list)

389

# 3) Analysis

In [None]:
import matplotlib.pyplot as plt


# Display the first image
first_image = test_img_list[0]
plt.imshow(first_image)
plt.axis('off')  
plt.show()

# 4 model

## 4-1) sequential model

In [None]:
import tensorflow as tf
from tensorflow.keras import layers

model_sequential = tf.keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(6, activation='softmax')
])

In [None]:
adam = tf.keras.optimizers.Adam(learning_rate=0.001)

# For Integer Vector
sce_loss = tf.keras.losses.SparseCategoricalCrossentropy()

# For one-hot vector
ce_loss = tf.keras.losses.CategoricalCrossentropy()

model_sequential.compile(
    optimizer = adam,
    loss = sce_loss,
    metrics=["acc"]
)

model_sequential.summary()

## 4-2) VGG16

In [None]:
import numpy as np
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D

num_classes = 6

# pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False)

# custom layers for classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
x = Dense(128, activation='relu')(x)
x = Dense(32, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)

# final model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
import tensorflow as tf

adam = tf.keras.optimizers.Adam(learning_rate=0.001)

# For Integer Vector
sce_loss = tf.keras.losses.SparseCategoricalCrossentropy()

# For one-hot vector
ce_loss = tf.keras.losses.CategoricalCrossentropy()

# Your Code Here
model.compile(
    optimizer = adam,
    loss = sce_loss,
    metrics=["acc"]
)

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 block1_conv1 (Conv2D)       (None, None, None, 64)    1792      
                                                                 
 block1_conv2 (Conv2D)       (None, None, None, 64)    36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, None, None, 64)    0         
                                                                 
 block2_conv1 (Conv2D)       (None, None, None, 128)   73856     
                                                                 
 block2_conv2 (Conv2D)       (None, None, None, 128)   147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, None, None, 128)   0     

In [None]:
X_train.shape, y_train.shape

((1987, 128, 128, 3), (1987,))

In [None]:
y_train.shape, y_val.shape

((1987,), (497,))

# 5 Experiments

In [None]:
model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.2
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7ff288193370>

In [None]:
val_loss, val_accuracy = model.evaluate(X_train, y_train, verbose=0)

print("Validation Accuracy:", val_accuracy)

Validation Accuracy: 0.9602415561676025


In [None]:
X_test = test_img_list

In [None]:
pred = model.predict(X_test)



In [None]:
pred.shape # (batch, num_class)

(389, 6)

In [None]:
pred

array([[6.1056170e-08, 8.8497974e-02, 1.0464837e-09, 3.8173557e-06,
        1.2986685e-06, 9.1149688e-01],
       [9.9999982e-01, 3.8097969e-11, 5.2694585e-12, 2.7057028e-14,
        1.1694971e-08, 9.0550536e-08],
       [3.5159853e-10, 3.1073241e-06, 1.7087930e-13, 5.9168018e-12,
        1.0735725e-08, 9.9999684e-01],
       ...,
       [2.5654066e-09, 9.6178315e-05, 2.0104618e-11, 9.9989891e-01,
        1.2314234e-06, 3.6786971e-06],
       [5.0421000e-05, 8.6599309e-04, 1.7346978e-02, 9.1370153e-01,
        6.7019328e-02, 1.0158350e-03],
       [9.9999499e-01, 2.4531579e-07, 6.2189848e-08, 8.9688791e-11,
        1.1801892e-06, 3.5066164e-06]], dtype=float32)

In [None]:
np.argmax(pred, axis=1) # 결과: 

array([5, 0, 5, 5, 3, 5, 0, 5, 3, 1, 5, 0, 0, 5, 3, 2, 2, 4, 3, 2, 5, 1,
       2, 0, 1, 5, 0, 2, 5, 2, 5, 5, 3, 5, 1, 3, 1, 3, 5, 0, 0, 0, 2, 0,
       5, 0, 0, 1, 1, 0, 3, 3, 1, 5, 4, 5, 0, 1, 4, 1, 5, 3, 5, 0, 3, 3,
       3, 1, 3, 1, 5, 5, 0, 0, 5, 5, 3, 5, 5, 5, 5, 5, 3, 0, 5, 1, 0, 1,
       3, 1, 1, 3, 5, 5, 5, 4, 5, 0, 1, 5, 3, 1, 3, 5, 0, 1, 0, 5, 5, 2,
       0, 0, 1, 2, 3, 5, 0, 5, 2, 1, 3, 5, 1, 5, 4, 1, 0, 2, 1, 5, 2, 4,
       0, 1, 0, 5, 1, 2, 5, 5, 1, 0, 2, 4, 5, 0, 0, 5, 5, 5, 1, 1, 3, 0,
       5, 2, 0, 5, 0, 5, 1, 5, 1, 5, 1, 5, 3, 2, 5, 5, 1, 3, 5, 0, 0, 1,
       4, 1, 0, 3, 5, 0, 2, 0, 1, 3, 5, 0, 0, 5, 5, 4, 4, 3, 0, 0, 2, 0,
       1, 4, 3, 0, 1, 5, 0, 1, 4, 3, 1, 0, 5, 0, 5, 5, 1, 0, 5, 1, 3, 4,
       0, 5, 2, 3, 0, 0, 1, 0, 5, 2, 3, 3, 2, 0, 3, 1, 0, 5, 3, 1, 3, 5,
       3, 0, 1, 5, 2, 5, 1, 0, 5, 1, 2, 0, 1, 3, 2, 0, 4, 0, 1, 5, 0, 1,
       0, 4, 0, 0, 1, 1, 5, 3, 5, 5, 3, 0, 2, 0, 5, 0, 2, 0, 5, 0, 5, 5,
       3, 0, 0, 1, 1, 3, 5, 1, 1, 5, 1, 0, 0, 1, 0,

# 6 Submission

In [None]:
import pandas as pd
submission = pd.read_csv('/content/drive/MyDrive/고머딥/기말프로젝트/for-student/submission-example.csv')
submission['Category']=np.argmax(pred, axis=1)
submission

Unnamed: 0,Id,Category
0,0.jpg,5
1,1.jpg,0
2,2.jpg,5
3,3.jpg,5
4,4.jpg,3
...,...,...
384,384.jpg,5
385,385.jpg,0
386,386.jpg,3
387,387.jpg,3


In [None]:
max(submission['Category'])

5

In [None]:
submission.to_csv('submission_230525_vgg16model.csv', index=False)

In [None]:
submission['Category'].value_counts()