In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import zipfile
zip_path = "/content/drive/MyDrive/ML/data.zip"
unzip_path = "/content/data"
if not os.path.exists(unzip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(unzip_path)

In [None]:
import math
import numpy as np
import pandas as pd

import cv2
import seaborn as sns
from matplotlib import pyplot

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

import tensorflow as tf
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, Conv2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Dropout, BatchNormalization, Activation
from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
train_datagen = ImageDataGenerator(rescale = 1./255,
        validation_split = 0.1,
        rotation_range=15,
        width_shift_range=0.15,
        height_shift_range=0.15,
        shear_range=0.15,
        zoom_range = 0.15,
        horizontal_flip=True,)

valid_datagen = ImageDataGenerator(rescale = 1./255, validation_split = 0.1)

In [None]:
train_dataset  = train_datagen.flow_from_directory(directory = f"{unzip_path}/data/Images/train",
                          target_size = (48,48),
                          class_mode = 'categorical',
                          subset = 'training',
                          batch_size = 32)
valid_dataset = valid_datagen.flow_from_directory(directory = f"{unzip_path}/data/Images/train",
                          target_size = (48,48),
                          class_mode = 'categorical',
                          subset = 'validation',
                          batch_size = 32)

Found 25841 images belonging to 7 classes.
Found 2868 images belonging to 7 classes.


In [None]:
vgg = tf.keras.applications.VGG19(weights = 'imagenet',
                  include_top = False,
                  input_shape = (48, 48, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m80134624/80134624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
vgg.summary()

In [None]:
def build_model(bottom_model, classes):
    model = bottom_model.layers[-2].output
    model = GlobalAveragePooling2D()(model)
    model = Dense(classes, activation = 'softmax', name = 'out_layer')(model)

    return model

In [None]:
num_classes = 7

head = build_model(vgg, num_classes)

model = Model(inputs = vgg.input, outputs = head)

print(model.summary())

None


In [None]:
early_stopping = EarlyStopping(monitor = 'val_accuracy',
                               min_delta = 0.00005,
                               patience = 11,
                               verbose = 1,
                               restore_best_weights = True,)

lr_scheduler = ReduceLROnPlateau(monitor = 'val_accuracy',
                                 factor = 0.5,
                                 patience = 7,
                                 min_lr = 1e-7,
                                 verbose = 1,)

callbacks = [early_stopping,lr_scheduler,]

In [None]:
# batch size of 32 performs the best.
batch_size = 32
epochs = 25
optims = [optimizers.Adam(learning_rate = 0.0001, beta_1 = 0.9, beta_2 = 0.999),]

model.compile(loss = 'categorical_crossentropy',
              optimizer = optims[0],
              metrics = ['accuracy'])

In [None]:
# history = model.fit(
#     train_dataset,
#     validation_data=valid_dataset,
#     steps_per_epoch=int(np.ceil(train_dataset.samples / train_dataset.batch_size)),
#     validation_steps=int(np.ceil(valid_dataset.samples / valid_dataset.batch_size)),
#     epochs=epochs,
#     callbacks=callbacks
# )

Epoch 1/25
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 76ms/step - accuracy: 0.6938 - loss: 0.8221 - val_accuracy: 0.6681 - val_loss: 0.9201 - learning_rate: 1.0000e-04
Epoch 2/25
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 3/25
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 67ms/step - accuracy: 0.7039 - loss: 0.8075 - val_accuracy: 0.6541 - val_loss: 0.9347 - learning_rate: 1.0000e-04
Epoch 4/25
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 5/25
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 68ms/step - accuracy: 0.7144 - loss: 0.7654 - val_accuracy: 0.6632 - val_loss: 0.9484 - learning_rate: 1.0000e-04
Epoch 6/25
[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14us/step - accuracy: 0.0000e+00 -

KeyboardInterrupt: 

In [None]:
history = model.fit(train_dataset,
            validation_data=valid_dataset,
            steps_per_epoch = train_dataset.samples // train_dataset.batch_size,
            epochs = epochs,
            callbacks = callbacks,)

Epoch 1/25
[1m403/403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 141ms/step - accuracy: 0.4186 - loss: 1.4865 - val_accuracy: 0.5275 - val_loss: 1.2770 - learning_rate: 0.0010
Epoch 2/25
[1m403/403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4375 - loss: 1.3862 - val_accuracy: 0.5349 - val_loss: 1.2591 - learning_rate: 0.0010
Epoch 3/25
[1m403/403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 117ms/step - accuracy: 0.4967 - loss: 1.3180 - val_accuracy: 0.5485 - val_loss: 1.1814 - learning_rate: 0.0010
Epoch 4/25
[1m403/403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.6875 - loss: 1.0499 - val_accuracy: 0.5495 - val_loss: 1.1812 - learning_rate: 0.0010
Epoch 5/25
[1m403/403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 117ms/step - accuracy: 0.5433 - loss: 1.2139 - val_accuracy: 0.5673 - val_loss: 1.1282 - learning_rate: 0.0010
Epoch 6/25
[1m403/403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [None]:
model.save("/content/drive/MyDrive/ML/VGG_no_validate_origin.keras")

In [None]:
from tensorflow.keras.models import load_model

model = load_model("/content/drive/MyDrive/ML/VGG_no_validate_origin.keras")

In [None]:
import os
import shutil

test_path = f"{unzip_path}/data/Images/test"
dummy_folder = os.path.join(test_path, "dummy")
os.makedirs(dummy_folder, exist_ok=True)

for file in os.listdir(test_path):
    if file.endswith(('.jpg', '.png')):
        shutil.move(os.path.join(test_path, file), os.path.join(dummy_folder, file))

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# import os
import pandas as pd

# 路徑至您的測試資料資料夾
# test_path = f"{unzip_path}/data/Images/test/"

# 圖像生成器（只做 rescale）
test_datagen = ImageDataGenerator(rescale=1./255)

# 建立測試資料生成器
test_generator = test_datagen.flow_from_directory(
    directory=test_path,
    target_size=(48, 48),
    class_mode=None,
    batch_size=32,
    shuffle=False
)

# 獲取檔案名稱
print("Number of test images:", len(test_generator.filenames))
filenames = test_generator.filenames

Found 3589 images belonging to 1 classes.
Number of test images: 3589


In [None]:
# 預測
predictions = model.predict(test_generator)
predicted_classes = predictions.argmax(axis=-1)  # 獲得預測的分類索引

# 類別對應
index_mapping = {"Angry": 0, "Disgust": 1, "Fear": 2, "Happy": 3, "Neutral": 4, "Sad": 5, "Surprise": 6}
class_dic = {v: k for k, v in index_mapping.items()}  # 索引轉為類別名稱

# 將檔案名稱與預測結果整理
labels = [index_mapping[class_dic[pred]] for pred in predicted_classes]  # 數字標籤
file_names = [os.path.basename(name)[:-4] for name in filenames]

# 生成 CSV
submission = pd.DataFrame({"filename": file_names, "label": labels})
submission.to_csv("/content/drive/MyDrive/ML/VGG_no_validate_origin.csv", index=False)

[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step
