In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
from tqdm import tqdm

### 데이터 전처리

In [None]:
import os
import numpy as np
import pandas as pd
import json #json을 파싱하기 위해
import re
from sklearn.model_selection import train_test_split

In [None]:
# DataFrame 출력 사이즈 조정
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_row', 50)

In [None]:
def file_to_json(f):
    data = json.load(f)
    yield data["이미지 정보"]
    yield data["데이터셋 정보"]

In [None]:
dir_path = '/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/label'
images = []
datasets = []

for (root, directories, files) in os.walk(dir_path):
    for file in tqdm(files):
        file_path = os.path.join(root, file)
        with open(file_path, 'r', encoding='utf-8-sig') as f:
            g = file_to_json(f)
            image_data = next(g)
            images.append(image_data)
            dataset_data = next(g)
            datasets.append(dataset_data)

In [None]:
# 이미지 정보 DataFrame
image_df = pd.DataFrame(images)
image_df.drop(["이미지 파일명"],axis=1, inplace=True)
image_df.rename(columns={"이미지 식별자":"파일 번호"}, inplace=True)

In [None]:
image_df.tail()

In [None]:
image_df.to_csv('/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/image_df.csv', sep=',', na_rep='NaN')

In [None]:
# 데이터셋 정보 DataFrame
pre_dataset_df = pd.DataFrame(datasets)
dataset_json = pre_dataset_df.to_json(orient="table")
dataset_json = json.loads(dataset_json)
dataset_df = pd.json_normalize(data=dataset_json['data'])
dataset_df.drop(["index","파일 생성일자","파일 이름",
                 "데이터셋 상세설명.렉트좌표.아우터","데이터셋 상세설명.렉트좌표.하의","데이터셋 상세설명.렉트좌표.원피스","데이터셋 상세설명.렉트좌표.상의",
                "데이터셋 상세설명.폴리곤좌표.아우터","데이터셋 상세설명.폴리곤좌표.하의","데이터셋 상세설명.폴리곤좌표.원피스","데이터셋 상세설명.폴리곤좌표.상의"],
                axis=1, inplace=True)
dataset_df.rename(columns={"데이터셋 상세설명.라벨링.스타일":"스타일","데이터셋 상세설명.라벨링.아우터":"라벨링_아우터","데이터셋 상세설명.라벨링.하의":"라벨링_하의","데이터셋 상세설명.라벨링.원피스":"라벨링_원피스","데이터셋 상세설명.라벨링.상의":"라벨링_상의"}, inplace=True)

In [None]:
dataset_df.head()

In [None]:
dataset_df.to_csv('/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/dataset_df.csv', sep=',', na_rep='NaN')

In [None]:
df = pd.read_csv("/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/dataset_df.csv", encoding='utf-8-sig')

In [None]:
dataset_df.drop(['Unnamed: 0','스타일'], axis=1, inplace=True)

In [None]:
# 데이터 생성
X = []
Y = []

for r in df.index:
  filename = df['파일 이름']
  img_path = f'이미지 경로/{filename}.jpg'

  # image data
  img = image.load_img(img_path, target_size=(100, 100), interpolation='nearest')
  img_tensor = image.img_to_array(img)
  img_tensor = preprocess_input(img_tensor)
  X.append(img_tensor)

  # label data
  categories = [0,0,0,0]
  for c,_ in enumerate(df.columns):
    if re.search('[가-힣]+', str(df.iloc[r,c])):
      categories[c] = 1
  Y.append(categories)
  
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,
                                                    random_state=100,
                                                    test_size=0.1,
                                                    stratify=Y,)  
data = (X_train, X_test, Y_train, Y_test)
# 파일로 저장
np.save('C:/Users/dnflc/Desktop/K-Fashion dataset/data/X_train', X_train)
np.save('C:/Users/dnflc/Desktop/K-Fashion dataset/data/X_test', X_test)
np.save('C:/Users/dnflc/Desktop/K-Fashion dataset/data/Y_train', Y_train)
np.save('C:/Users/dnflc/Desktop/K-Fashion dataset/data/Y_test', Y_test)

###모델 쌓기

In [None]:
X_train = np.load('/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/X_train.npy')
X_test = np.load('/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/X_test.npy')
Y_train = np.load('/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/Y_train.npy')
Y_test = np.load('/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/Y_test.npy')

In [None]:
import tensorflow as tf
import keras
from keras import models, layers, Input, optimizers, initializers, regularizers, metrics, losses
from keras.models import Model, Sequential
from keras.applications.resnet_v2 import ResNet50V2, preprocess_input, decode_predictions
from keras.preprocessing import image
import numpy as np
import matplotlib.pyplot as plt

In [None]:
model = ResNet50V2(weights=None, input_tensor=Input(shape=(100,100,3)), classes=4)

In [None]:
input_shape = (100, 100, 3)
drop_rate = 0.5

In [None]:
model.compile(optimizer='adam',
              loss=losses.categorical_crossentropy,
              metrics=['accuracy'])

history = model.fit(X_train, Y_train,
                    batch_size=128,
                    epochs=50,
                    validation_data=(X_test, Y_test))
model.save("/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/model2")

In [None]:
model = keras.models.load_model("/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/model2")

In [None]:
import matplotlib.pyplot as plt

acc = history.history['accuracy']
val_acc = history.history["val_accuracy"]

plt.plot(range(1, len(acc)+1), acc, label="train")
plt.plot(range(1, len(acc)+1), val_acc, label="test")
plt.legend()

In [None]:
img_path = '/content/drive/MyDrive/k-fashion_tagging_AI/k-fashion_dataset/KakaoTalk_20220623_023658545.jpg'
img = image.load_img(img_path, target_size=(100, 100), interpolation='nearest')
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

preds = model.predict(x)
# print('Predicted:', decode_predictions(preds, top=3)[0])
print(np.argmax(preds))