## ※ Q1_0206. IMDB 영화 리뷰 데이터셋을 사용하여 긍부정 이진분류 모델링 및 평가를 수행하세요. 단, embedding 차원은 8

In [None]:
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense

# 데이터셋 로드
# num_words=10000은 훈련 데이터에서 가장 자주 나타나는 상위 10,000개의 단어만 사용하겠다는 의미입니다.
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# 시퀀스 데이터 패딩
x_train = pad_sequences(x_train, maxlen=100)
x_test = pad_sequences(x_test, maxlen=100)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
# Q1_ 답1

from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# 데이터셋 로드
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words = 10000)

# 시퀀스 데이터 패딩
x_train = pad_sequences(x_train, maxlen = 100)
x_test = pad_sequences(x_test, maxlen = 100)

# 모델 구축
word_size = 10000
model = Sequential([
    Embedding(word_size, 8, input_length = 100),
    Flatten(),
    Dense(10, activation = 'relu'),
    Dense(1, activation='sigmoid')  # 이진분류는 출력 뉴런 수를 1로 설정
])

# 모델 요약 출력
model.summary()

# EarlyStopping 설정
Early_Stopping_Callbacks = EarlyStopping(monitor = 'val_accuracy', patience = 20)

# 모델 컴파일 및 결과 출력
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.fit(x_train, y_train, epochs = 20000, batch_size = 20, verbose = 1, validation_split = 0.25, callbacks = [Early_Stopping_Callbacks])
print('\nAccuracy: %.4f' % model.evaluate(x_test, y_test)[1])

Model: "sequential_29"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_23 (Embedding)    (None, 100, 8)            80000     
                                                                 
 flatten_17 (Flatten)        (None, 800)               0         
                                                                 
 dense_23 (Dense)            (None, 10)                8010      
                                                                 
 dense_24 (Dense)            (None, 1)                 11        
                                                                 
Total params: 88021 (343.83 KB)
Trainable params: 88021 (343.83 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/20000
Epoch 2/20000
Epoch 3/20000
Epoch 4/20000
Epoch 5/20000
Epoch 6/20000
Epoch 7/20000
Epoch 8/20000
Epoch 9/20000
Epoch 10/20000
Epoch 

In [None]:
# Q1_ 답2

from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense

# 데이터셋 로드
# num_words=10000은 훈련 데이터에서 가장 자주 나타나는 상위 10,000개의 단어만 사용하겠다는 의미입니다.
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# 시퀀스 데이터 패딩
x_train = pad_sequences(x_train, maxlen=100)
x_test = pad_sequences(x_test, maxlen=100)


model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=8, input_length=100))
model.add(Flatten())
model.add(Dense(units=1, activation='sigmoid'))


model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


model.summary()


model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


loss, accuracy = model.evaluate(x_test, y_test)
print("Test Accuracy:", accuracy)

Model: "sequential_28"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_22 (Embedding)    (None, 100, 8)            80000     
                                                                 
 flatten_16 (Flatten)        (None, 800)               0         
                                                                 
 dense_22 (Dense)            (None, 1)                 801       
                                                                 
Total params: 80801 (315.63 KB)
Trainable params: 80801 (315.63 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.834559977054596


## ※ Q2_0206. IDMB 영화 리뷰 데이터셋에 대한 감성 분석을 수행하는 SimpleRNN 모델을 구성하고 훈련하는 예제에 EatlyStopping과 ModelCheckpoint 콜백을 추가하여 모델 훈련을 개선하세요.

In [None]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint

# IMDB 데이터셋 로드
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# 데이터 전처리: 시퀀스 패딩
max_len = 100
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# 모델 구성
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=32))
model.add(SimpleRNN(units=32))
model.add(Dense(units=1, activation='sigmoid'))

# 모델 컴파일
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 콜백 정의: EarlyStopping과 ModelCheckpoint
filepath = '/content/drive/MyDrive/kita_231026/m6_dl/data/model/best_model.h5'
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1)
model_checkpoint = ModelCheckpoint(filepath= filepath, monitor='val_loss', save_best_only=True, verbose=1)


# 모델 훈련
history = model.fit(x_train, y_train, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stopping, model_checkpoint])

# 모델 평가
loss, accuracy = model.evaluate(x_test, y_test)
print("Test Accuracy:", accuracy)

Epoch 1/20
Epoch 1: val_loss improved from inf to 0.44412, saving model to /content/drive/MyDrive/kita_231026/m6_dl/data/model/best_model.h5
Epoch 2/20
 11/625 [..............................] - ETA: 7s - loss: 0.3633 - accuracy: 0.8608

  saving_api.save_model(


Epoch 2: val_loss improved from 0.44412 to 0.41945, saving model to /content/drive/MyDrive/kita_231026/m6_dl/data/model/best_model.h5
Epoch 3/20
Epoch 3: val_loss did not improve from 0.41945
Epoch 4/20
Epoch 4: val_loss did not improve from 0.41945
Epoch 5/20
Epoch 5: val_loss did not improve from 0.41945
Epoch 5: early stopping
Test Accuracy: 0.805679976940155


In [None]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint

# IMDB 데이터셋 로드
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# 데이터 전처리: 시퀀스 패딩
max_len = 100
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

In [None]:
# 저장된 모델을 불러와서 평가에 사용하기
from tensorflow.keras.models import load_model

# 저장된 모델 불러오기
model_path = "/content/drive/MyDrive/kita_231026/m6_dl/data/model/best_model.h5/"
loaded_model = load_model(model_path)

# 테스트 데이터로 모델 평가
loss, accuracy = loaded_model.evaluate(x_test, y_test)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')

Test Loss: 0.4181574881076813
Test Accuracy: 0.8180800080299377


In [None]:
# 출력 결과는 이진 분류 문제에서 모델이 각 샘플에 대해 긍정 클래스(예: 리뷰가 긍정적)에 속할 확률
from tensorflow.keras.models import load_model

# 저장된 모델 불러오기
model_path = "/content/drive/MyDrive/kita_231026/m6_dl/data/model/best_model.h5/"
loaded_model = load_model(model_path)

# 새로운 데이터(여기서는 테스트 데이터셋)에 대해 예측 수행
predictions = loaded_model.predict(x_test)
predicted_classes = (predictions[:10] > 0.5).astype(int)

# 확률과 클래스 레이블 함께 출력
for i, (prob, label) in enumerate(zip(predictions, predicted_classes)):
    print(f"Sample {i+1}: Probability={prob[0]:.4f}, Class={label[0]}")

Sample 1: Probability=0.1466, Class=0
Sample 2: Probability=0.9553, Class=1
Sample 3: Probability=0.8759, Class=1
Sample 4: Probability=0.1972, Class=0
Sample 5: Probability=0.9520, Class=1
Sample 6: Probability=0.2059, Class=0
Sample 7: Probability=0.7069, Class=1
Sample 8: Probability=0.0366, Class=0
Sample 9: Probability=0.5664, Class=1
Sample 10: Probability=0.9108, Class=1
