# 모델 앙상블

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Input, Reshape

import time

# 회귀 문제

In [2]:
x = np.arange(-1, 1, 0.01)
np.random.shuffle(x)
y = np.sin(x)

i = int(x.shape[0]*0.8)

train_x, test_x = x[:i], x[i:]
train_y, test_y = y[:i], y[i:]


In [3]:
def build_model():  
  model = keras.Sequential()
  model.add(Input((1,)))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(1))

  model.compile(optimizer="Adam", loss="mse")

  return model

In [4]:
models = []
for _ in range(3):
	model = build_model()
	models.append(model)

In [5]:
for model in models:
	model.fit(train_x, train_y, epochs=1, batch_size=32)



In [8]:
y_ = [model.predict(test_x) for model in models]
y_ = np.array(y_).squeeze()
print("y_.shape =", y_.shape)
print(y_[:,:5])

outcomes = np.mean(y_,axis=0)
print("outcomes.shape =", outcomes.shape)
print(outcomes[:5])

mse = tf.keras.losses.mean_squared_error(test_y, outcomes).numpy()
print("mse =", mse)

y_.shape = (3, 40)
[[-0.04627877 -0.06698939 -0.16640043 -0.08356776 -0.05322006]
 [-0.04277497 -0.057627   -0.12891673  0.03177328  0.01537557]
 [ 0.00806023  0.00521133 -0.00846342 -0.09507136 -0.0611092 ]]
outcomes.shape = (40,)
[-0.02699784 -0.03980168 -0.10126019 -0.04895528 -0.03298457]
mse = 0.25441456


# 분류 문제

In [9]:
(raw_train_x, raw_train_y), (raw_test_x, raw_test_y) = tf.keras.datasets.mnist.load_data()

train_x = raw_train_x / 255.
test_x = raw_test_x / 255.

train_y = raw_train_y
test_y = raw_test_y

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [10]:
def build_model():  
  model = keras.Sequential()
  model.add(Input((28,28)))
  model.add(Reshape((28,28,1)))
  model.add(Conv2D(32, (3, 3), padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Conv2D(64, (3, 3), padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Flatten())
  model.add(Dense(10, activation='relu'))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(10, activation='softmax'))

  model.compile(optimizer="Adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

  return model

In [12]:
models = []
for _ in range(3):
	model = build_model()
	models.append(model)

In [13]:
for model in models:
	model.fit(train_x, train_y, epochs=1, batch_size=32)



In [15]:
from sklearn.metrics import accuracy_score

y_ = [model.predict(test_x) for model in models]
y_ = np.array(y_)
print(y_.shape)
print(y_[:,0])
meaned = np.mean(y_, axis=0)
print(meaned.shape)
print(meaned[0])
outcomes = np.argmax(meaned, axis=1)
print(outcomes)

acc = accuracy_score(test_y, outcomes)
print(acc)


(3, 10000, 10)
[[7.3118940e-08 5.5200302e-09 9.9603244e-07 4.3107422e-05 1.0332471e-14
  6.8421807e-10 5.8407901e-12 9.9994910e-01 1.1971389e-07 6.5158051e-06]
 [2.8580308e-10 1.7833463e-03 1.4674904e-04 6.8143847e-05 6.6067179e-05
  1.2125081e-08 1.3628551e-10 9.9663866e-01 1.1498529e-03 1.4721218e-04]
 [1.0158718e-10 1.3534563e-07 1.9632228e-06 3.0818526e-07 9.5017440e-12
  6.6770443e-11 1.1295679e-15 9.9945599e-01 8.3412893e-10 5.4167776e-04]]
(10000, 10)
[2.4502109e-08 5.9449574e-04 4.9902766e-05 3.7186484e-05 2.2022396e-05
 4.2920232e-09 4.7375808e-11 9.9868125e-01 3.8332446e-04 2.3180193e-04]
[7 2 1 ... 4 5 6]
0.9789


# 2진 분류 문제

In [16]:
!wget https://raw.githubusercontent.com/dhrim/MDC_2021/master/material/deep_learning/data/sonar.csv

--2021-12-29 00:53:47--  https://raw.githubusercontent.com/dhrim/MDC_2021/master/material/deep_learning/data/sonar.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87776 (86K) [text/plain]
Saving to: ‘sonar.csv’


2021-12-29 00:53:48 (5.11 MB/s) - ‘sonar.csv’ saved [87776/87776]



In [17]:
raw_data = pd.read_csv("sonar.csv")
data = raw_data.to_numpy()
np.random.shuffle(data)

x = data[:,:60]
y = data[:,60]

i = int(len(x)*0.8)
train_x, test_x = x[:i], x[i:]
train_y, test_y = y[:i], y[i:]

In [18]:
def build_model():  

  model = keras.Sequential()
  model.add(Dense(10, activation='relu', input_shape=(60,)))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(1, activation="sigmoid"))

  model.compile(optimizer="Adam", loss="binary_crossentropy", metrics=["acc"])

  return model

In [19]:
models = []
for _ in range(3):
	model = build_model()
	models.append(model)

In [20]:
for model in models:
	model.fit(train_x, train_y, epochs=1, batch_size=32, validation_split=0.2)



In [23]:
y_ = [model.predict(test_x) for model in models]
y_ = np.array(y_).squeeze()
predicted = (y_>0.5).astype(np.int)
print(predicted.shape)
print(predicted[:,:10])

import scipy
outcomes, _ = scipy.stats.mode(predicted)
outcomes = outcomes.squeeze()
print(outcomes.shape)
print(outcomes[:10])


from sklearn.metrics import accuracy_score

acc = accuracy_score(test_y, outcomes)
print("acc = ", acc)

(3, 42)
[[1 1 1 1 1 1 0 1 1 1]
 [1 1 1 1 1 1 1 1 1 1]
 [1 1 0 1 1 1 1 1 1 1]]
(42,)
[1 1 1 1 1 1 1 1 1 1]
acc =  0.47619047619047616
