#  과제2-1. 수어번역기 모델링 및 추적(개인과제)

## 1.환경준비

### (1) 라이브러리 로딩

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random as rd

from sklearn.model_selection import train_test_split
from sklearn.metrics import *

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.backend import clear_session
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping


import mlflow
import mlflow.keras

In [2]:
np.set_printoptions(linewidth=np.inf)

* 함수 만들기

In [3]:
# 학습곡선 함수
def dl_history_plot(history):
    plt.figure(figsize=(10,6))
    plt.plot(history['loss'], label='train_err', marker = '.')
    plt.plot(history['val_loss'], label='val_err', marker = '.')

    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    plt.grid()
    plt.show()

### (2) 데이터로딩

#### 1) 데이터 가져오기
* 제공 받은 데이터를 여러분의 구글드라이브 적절한 위치에 업로드 합니다.
    * 가능하면, 구글드라이브 첫 경로 밑에 폴더를 만들고 업로드 하기를 권장합니다.

In [4]:
path = ''
file = 'sign_language.csv'

data = pd.read_csv(path+file)
data.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,22,186,186,185,185,185,184,185,184,184,...,171,122,65,62,215,180,99,47,31,25
1,24,175,177,179,181,181,182,182,182,183,...,202,202,201,200,199,197,195,193,191,189
2,6,187,187,187,187,187,186,187,186,186,...,45,23,25,34,41,43,39,40,43,35
3,2,169,169,169,170,169,170,169,169,169,...,204,203,200,199,198,196,194,193,191,190
4,21,153,165,170,179,190,198,203,206,213,...,161,122,132,96,75,63,37,43,70,66


In [5]:
data.shape

(8000, 785)

#### 2) class names
* label이 0 ~ 27 까지의 숫자로 표현됩니다.
* 수어 중에서 J(9) 와 Z(27)는 손을 움직이면서 표현해야 하므로, 여기서는 제외합니다.
* 숫자로 레이블링 된 것을, 나중에 문자 변환하기 위해 문자로 된 리스트를 생성합니다.

In [6]:
import string
class_names = list(string.ascii_lowercase)
class_names = np.array(class_names)
len(class_names), class_names

(26,
 array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'], dtype='<U1'))

### (3) mlflow 추적 준비
* 서버 주소 사용

In [7]:
mlflow_uri = "http://mini7-mlflow.carpediem.so/"
mlflow.set_tracking_uri(mlflow_uri)

### (4) 새 실험 생성
* 팀에서 1명만 새 실험 생성 
    * exp_## (## : 팀 번호)
    * 팀원들과 exp id 공유

In [8]:
exp_id=60

## 2.데이터 준비

### (1) 데이터 분할1 : x, y 나누기

In [10]:
x = data.drop("label",axis=1)
y = data["label"]

### (2) 데이터 분할2 : train, test

* 적절하게 분할 하시오.

In [11]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=1000,random_state=42,stratify=y)
x_train,x_val,y_train,y_val = train_test_split(x_train,y_train,test_size=2000,random_state=42,stratify=y_train)

### (3) DL을 위한 전처리
* 모두 넘파이로 변환

In [12]:
x_train = np.array(x_train)
x_test = np.array(x_test)
x_val = np.array(x_val)
y_train = np.array(y_train)
y_test = np.array(y_test)
y_val = np.array(y_val)

* shape 맞추기 n, 28,28,1

In [13]:
x_train = x_train.reshape(-1,28,28,1)
x_val = x_val.reshape(-1,28,28,1)
x_test = x_test.reshape(-1,28,28,1)

* Scaling : Min-Max
    * 0-255 값으로 되어 있는 데이터를 0-1사이 값으로 변환
    * x_train2, x_val2 를 그냥 255로 나누면 됨

In [14]:
x_train2 = x_train/255
x_val2 = x_val/255
x_test2 = x_test/255

## 3.모델링
* 개인별 모델링 및 추적

### (1) 모델 추적

In [15]:
with mlflow.start_run(experiment_id=exp_id,run_name="a031151"):
    mlflow.keras.autolog()
    
    keras.backend.clear_session()

    il = keras.layers.Input(shape=(28,28,1))
    cl = keras.layers.Conv2D(filters=32,kernel_size=(2,2),strides=(2,2),padding="same",activation="relu")(il)
    bl = keras.layers.BatchNormalization()(cl)
    cl2 = keras.layers.Conv2D(filters=32,kernel_size=(2,2),padding="valid",activation="relu")(bl)
    bl2 = keras.layers.BatchNormalization()(cl2)
    cl3 = keras.layers.Conv2D(filters=64,kernel_size=(3,3),padding="same",activation="relu")(bl2)
    bl3 = keras.layers.BatchNormalization()(cl3)
    pl = keras.layers.MaxPool2D(pool_size=(2,2))(bl3)
    cl4 = keras.layers.Conv2D(filters=80,kernel_size=(3,3),padding="same",activation="relu")(pl)
    bl4 = keras.layers.BatchNormalization()(cl4)
    cl5 = keras.layers.Conv2D(filters=192,kernel_size=(2,2),padding="valid",activation="relu")(bl4)
    pl2 = keras.layers.MaxPool2D(pool_size=(2,2))(cl5)

    a = keras.layers.Conv2D(filters=64,kernel_size=(3,3),padding="same",activation="relu")(pl2)
    a2 = keras.layers.BatchNormalization()(a)
    a3 = keras.layers.Conv2D(filters=96,kernel_size=(3,3),padding="same",activation="relu")(a2)
    a4 = keras.layers.BatchNormalization()(a3)
    a5 = keras.layers.Conv2D(filters=64,kernel_size=(3,3),padding="same",activation="relu")(a4)
    a6 = keras.layers.BatchNormalization()(a5)

    b = keras.layers.Conv2D(filters=48,kernel_size=(3,3),padding="same",activation="relu")(pl2)
    b2 = keras.layers.BatchNormalization()(b)
    b3 = keras.layers.Conv2D(filters=64,kernel_size=(3,3),padding="same",activation="relu")(b2)
    b4 = keras.layers.BatchNormalization()(b3)

    c = keras.layers.AveragePooling2D(pool_size=(2,2),strides=(1,1),padding="same")(pl2)
    c2 = keras.layers.Conv2D(filters=64,kernel_size=(3,3),padding="same",activation="relu")(c)
    c3 = keras.layers.BatchNormalization()(c2)

    d = keras.layers.Conv2D(filters=64,kernel_size=(3,3),padding="same",activation="relu")(pl2)
    d2 = keras.layers.BatchNormalization()(d)

    Add = keras.layers.Add()([a6,b4,c3,d2])
    con = keras.layers.Concatenate()([a6,b4,c3,d2])

    con2 = keras.layers.Concatenate()([Add,con])

    aver = keras.layers.GlobalAveragePooling2D()(con2)

    ol = keras.layers.Dense(26,activation = "softmax")(aver)

    model = keras.models.Model(il,ol)
    model.compile(loss=keras.losses.sparse_categorical_crossentropy,metrics=["accuracy"],optimizer="adam")
    
    es = EarlyStopping(monitor = "val_loss",min_delta=0,patience = 5,verbose=1,restore_best_weights=True)
    history=model.fit(x_train2,y_train,validation_split=0.2,verbose=1,epochs=30,callbacks=[es])
    
    y_pred = model.predict(x_val2).argmax(axis=1)
    acc = accuracy_score(y_val,y_pred)
    mlflow.log_metric("accuracy", acc)
    mlflow.keras.log_model(model, "model",  registered_model_name="Sign_Signal_14")
    mlflow.keras.autolog(disable = True)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30




INFO:tensorflow:Assets written to: C:\Users\dufwn\AppData\Local\Temp\tmplap96yv_\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\dufwn\AppData\Local\Temp\tmplap96yv_\model\data\model\assets






INFO:tensorflow:Assets written to: C:\Users\dufwn\AppData\Local\Temp\tmp1jh17njr\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\dufwn\AppData\Local\Temp\tmp1jh17njr\model\data\model\assets
Registered model 'Sign_Signal_14' already exists. Creating a new version of this model...
2023/05/18 16:44:41 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: Sign_Signal_14, version 2
Created version '2' of model 'Sign_Signal_14'.


* 학습곡선

* 예측 및 검증

## 4.모델 사용하기
* 팀에서 1명만 mlflow server ui에 접속하여 
    * model_## 의 각 버전 중 성능이 가장 좋은 버전을 production으로 상태변경
* 팀원들은 각자 운영모델 로딩하고, 사용해보기
    * test 셋으로 예측 및 평가
    * test image 하나를 불러와서 예측

### (1) 운영모델 로딩

In [16]:
model_uri = "models:/Sign_Signal_14/production" 
model_p = mlflow.keras.load_model(model_uri)

### (2) test 데이터로 예측하고 평가

In [17]:
pred = model_p.predict(x_test2)
pred_1 = pred.argmax(axis=1)



In [18]:
print(accuracy_score(y_test, pred_1))
print('-'*60)
print(confusion_matrix(y_test, pred_1))
print('-'*60)
print(classification_report(class_names[y_test], class_names[pred_1]))

0.999
------------------------------------------------------------
[[42  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 39  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0 39  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 43  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 34  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 45  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 43  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 39  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 41  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0 42  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0 45  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 41  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  

### (2) 이미지 로딩해서 사용하기
* 과제1 코드 참조

In [19]:
# 파이프라인에서 필요한 라이브러리/함수
import pandas as pd
import numpy as np
import cv2
import joblib

In [20]:
# 파일 열기
path = 'test image/'
file = 'v.png'
filename = path + file
img = cv2.imread(filename , cv2.IMREAD_GRAYSCALE)

# 크기 조절하기
img = cv2.resize(img, (28, 28))

# 입력데이터 형식을 갖추기
test_sign = img.reshape(1,28,28,1)

test_sign = test_sign / 255.

pred = model_p.predict(test_sign)
pred_1 = pred.argmax(axis=1)
print(class_names[pred_1])

['v']
