# 모델 앙상블

In [39]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Input, Reshape

import time

# 회귀 문제

In [12]:
x = np.arange(-1, 1, 0.01)
np.random.shuffle(x)
y = np.sin(x)

i = int(x.shape[0]*0.8)

train_x, test_x = x[:i], x[i:]
train_y, test_y = y[:i], y[i:]


In [13]:
def build_model():  
  model = keras.Sequential()
  model.add(Input((1,)))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(1))

  model.compile(optimizer="Adam", loss="mse")

  return model

In [14]:
n_members = 3

In [15]:
models = []
for _ in range(n_members):
	model = build_model()
	models.append(model)

In [16]:
for i in range(n_members):
	models[i].fit(train_x, train_y, epochs=1, batch_size=32)



In [37]:
y_ = [model.predict(test_x) for model in models]
y_ = np.array(y_)
print("y_.shape =", y_.shape)
outcomes = np.mean(y_,axis=0).squeeze()
print("outcomes.shape =", outcomes.shape)

mse = tf.keras.losses.mean_squared_error(test_y, outcomes).numpy()
print("mse =", mse)

y_.shape = (3, 40, 1)
outcomes.shape = (40,)
mse = 0.26387078


# 분류 문제

In [10]:
(raw_train_x, raw_train_y), (raw_test_x, raw_test_y) = tf.keras.datasets.mnist.load_data()

train_x = raw_train_x / 255.
test_x = raw_test_x / 255.

train_y = raw_train_y
test_y = raw_test_y

In [3]:
def build_model():  
  model = keras.Sequential()
  model.add(Input((28,28)))
  model.add(Reshape((28,28,1)))
  model.add(Conv2D(32, (3, 3), padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Conv2D(64, (3, 3), padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Flatten())
  model.add(Dense(10, activation='relu'))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(10, activation='softmax'))

  model.compile(optimizer="Adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

  return model

In [4]:
n_members = 3

In [5]:
models = []
for _ in range(n_members):
	model = build_model()
	models.append(model)

In [6]:
for i in range(n_members):
	models[i].fit(train_x, train_y, epochs=1, batch_size=32)



In [11]:
from sklearn.metrics import accuracy_score

y_ = [model.predict(test_x) for model in models]
summed = np.sum(y_, axis=0)
print(summed[0])
outcomes = np.argmax(summed, axis=1)
print(outcomes)

acc = accuracy_score(test_y, outcomes)
print(acc)


[3.9812742e-04 4.7774888e-03 5.7814235e-05 3.6314296e-04 2.1569857e-04
 6.4500101e-04 3.4116878e-04 2.9925301e+00 6.6051522e-05 6.0542283e-04]
[7 2 1 ... 4 5 6]
0.9725


In [None]:

# combine predictions
#===============================================
# 회귀 문제인 경우 평균값을 계산
# make predictions
yhats = [model.predict(testX) for model in models]
yhats = array(yhats)
# calculate average
outcomes = mean(yhats)
#===============================================
# (이진) 분류 문제인 경우 모드를 계산
# make predictions
yhats = [model.predict_classes(testX) for model in models]
yhats = array(yhats)
# calculate mode
outcomes, _ = mode(yhats)
#===============================================
# (멀티) 분류 문제인 경우 softmax 적용 후 argmax로 계산
# make predictions
#===============================================

In [None]:
fold_count = 5

In [None]:
from sklearn.model_selection import KFold

kfold = KFold(n_splits=fold_count, shuffle=True)

losses = []
acces = []

for train, test in kfold.split(x, y):

  model = build_model()

  history = model.fit(x[train], y[train], batch_size=32, epochs=2, validation_split=0.2)

  loss, acc = model.evaluate(x[test], y[test])
  print("loss =", loss)
  print("acc =", acc)
  losses.append(loss)
  acces.append(acc)


Epoch 1/2
Epoch 2/2
loss = 8.736839731682267e-07
acc = 0.0990714281797409
Epoch 1/2
Epoch 2/2
loss = 3.932102004000626e-07
acc = 0.09814285486936569
Epoch 1/2
Epoch 2/2
loss = 4.4507480367883545e-08
acc = 0.10014285892248154
Epoch 1/2
Epoch 2/2
loss = 4.5475235310732387e-07
acc = 0.09871428459882736
Epoch 1/2
Epoch 2/2
loss = 0.004454826470464468
acc = 0.09700000286102295


In [None]:
average_acc = sum(acces) / fold_count
print("average acc=", average_acc)

average acc= 0.09861428588628769


# 2진 분류 문제

In [38]:
!wget https://raw.githubusercontent.com/dhrim/MDC_2021/master/material/deep_learning/data/sonar.csv

--2021-12-28 06:35:41--  https://raw.githubusercontent.com/dhrim/MDC_2021/master/material/deep_learning/data/sonar.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87776 (86K) [text/plain]
Saving to: ‘sonar.csv’


2021-12-28 06:35:41 (19.1 MB/s) - ‘sonar.csv’ saved [87776/87776]



In [40]:
raw_data = pd.read_csv("sonar.csv")
data = raw_data.to_numpy()
np.random.shuffle(data)

x = data[:,:60]
y = data[:,60]

i = int(len(x)*0.8)
train_x, test_x = x[:i], x[i:]
train_y, test_y = y[:i], y[i:]

In [45]:
def build_model():  

  model = keras.Sequential()
  model.add(Dense(10, activation='relu', input_shape=(60,)))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(1, activation="sigmoid"))

  model.compile(optimizer="Adam", loss="binary_crossentropy", metrics=["acc"])

  return model

In [46]:
n_members = 3

In [47]:
models = []
for _ in range(n_members):
	model = build_model()
	models.append(model)

In [48]:
for i in range(n_members):
	models[i].fit(train_x, train_y, epochs=1, batch_size=32)



In [61]:
y_ = [model.predict(test_x) for model in models]
y_ = np.array(y_).squeeze()
predicted = (y_>0.5).astype(np.int)

import scipy
outcomes, _ = scipy.stats.mode(predicted)
print(outcomes[:10])
outcomes = outcomes.squeeze()


from sklearn.metrics import accuracy_score

acc = accuracy_score(test_y, outcomes)

[[0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1
  1 1 1 1 1 1]]
(42,)
(42,)
0.5238095238095238
