In [1]:
import os
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
# warnings.filterwarnings("ignore")

In [2]:
data_path = '/home/data/KAERI_dataset/'
os.listdir(data_path)

['pred',
 'sample_submission.csv',
 'test_features.csv',
 'train_features.csv',
 'train_target.csv']

In [3]:
sample_submission = pd.read_csv(data_path + 'sample_submission.csv')
test_features = pd.read_csv(data_path + 'test_features.csv')
train_features = pd.read_csv(data_path + 'train_features.csv')
train_target = pd.read_csv(data_path + 'train_target.csv')

In [4]:
train_features.shape

(1050000, 6)

In [11]:
train_df = np.array(train_features)[:, 2:].reshape(-1, 375, 4, 1)
target_df = train_target[['X','Y','M','V']]

In [9]:
train_df.shape

(2800, 375, 4, 1)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(train_df, target_df, test_size=0.3)

In [13]:
X_train.shape, y_train.shape

((1960, 375, 4, 1), (1960, 4))

In [15]:
# Lab 11 MNIST and Deep learning CNN
# https://www.tensorflow.org/tutorials/layers
import tensorflow as tf
import numpy as np

# Check out https://www.tensorflow.org/get_started/mnist/beginners for
# more information about the mnist dataset

# hyper parameters
learning_rate = 0.001
training_epochs = 20
batch_size = 100


# input place holders
X = tf.placeholder(tf.float32, [None, 375, 4, 1])

# img 28x28x1 (black/white), Input Layer
X_img = tf.reshape(X, [-1, 375, 4, 1])
Y = tf.placeholder(tf.float32, [None, 4])

# Convolutional Layer #1
conv1 = tf.layers.conv2d(inputs=X_img, filters=32, kernel_size=[4, 4],
                         activation=tf.nn.relu)
# Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 1],
                                strides=1)
dropout1 = tf.layers.dropout(inputs=pool1,
                             rate=0.3)

# Dense Layer with Relu
flat = tf.reshape(dropout1, [-1, 375 * 4 * 1])
dense4 = tf.layers.dense(inputs=flat,
                         units=375, activation=tf.nn.relu)
dropout4 = tf.layers.dropout(inputs=dense4,
                             rate=0.5)

# Logits (no activation) Layer: L5 Final FC 625 inputs -> 10 outputs
logits = tf.layers.dense(inputs=dropout4, units=4)



In [None]:
np.mean(np.sum(np.square(_t - _p), axis = 1) / 2e+04)
np.mean(np.sum(np.square((_t - _p) / (_t + 1e-06)), axis = 1))
0.5 * E1(y_true, y_pred) + 0.5 * E2(y_true, y_pred)

In [23]:
def kaeri_metric(y_true, y_pred):
    '''
    y_true: dataframe with true values of X,Y,M,V
    y_pred: dataframe with pred values of X,Y,M,V
    
    return: KAERI metric
    '''
    
    return 0.5 * E1(y_true, y_pred) + 0.5 * E2(y_true, y_pred)


### E1과 E2는 아래에 정의됨 ###

def E1(y_true, y_pred):
    '''
    y_true: dataframe with true values of X,Y,M,V
    y_pred: dataframe with pred values of X,Y,M,V
    
    return: distance error normalized with 2e+04
    '''
    
    _t, _p = np.array(y_true)[:,:2], np.array(y_pred)[:,:2]
    
    return np.mean(np.sum(np.square(_t - _p), axis = 1) / 2e+04)


def E2(y_true, y_pred):
    '''
    y_true: dataframe with true values of X,Y,M,V
    y_pred: dataframe with pred values of X,Y,M,V
    
    return: sum of mass and velocity's mean squared percentage error
    '''
    
    _t, _p = np.array(y_true)[:,2:], np.array(y_pred)[:,2:]
    
    
    return np.mean(np.sum(np.square((_t - _p) / (_t + 1e-06)), axis = 1))


## 특별상금1 점수 산출 지표
def E1_max(y_true, y_pred):
    '''
    y_true: dataframe with true values of X,Y,M,V
    y_pred: dataframe with pred values of X,Y,M,V
    
    return: maximum E1 error from 700 test samples
    '''
    
    _t, _p = np.array(y_true)[:,:2], np.array(y_pred)[:,:2]
    
    return np.max(np.sum(np.square(_t - _p), axis = 1) / 2e+04)


## 특별상금2 점수 산출 지표
def E2_max(y_true, y_pred):
    '''
    y_true: dataframe with true values of X,Y,M,V
    y_pred: dataframe with pred values of X,Y,M,V
    
    return: maximum E2 error from 700 test samples
    '''
    
    _t, _p = np.array(y_true)[:,2:], np.array(y_pred)[:,2:]
    
    return np.max(np.sum(np.square((_t - _p) / (_t + 1e-06)), axis = 1))

In [25]:
E1 = tf.reduce_mean(tf.square(Y[:, :2] - logits[:, :2]) / 2e+04)
E2 = tf.reduce_mean(tf.square(Y[:, 2:] - logits[:, 2:]) / (Y[:, 2:] + 1e-06))
cost = 0.5*E1 + 0.5*E2

optimizer = tf.train.AdamOptimizer(
    learning_rate=learning_rate).minimize(cost)

correct_prediction = logits

In [None]:
def predict(self, x_test, training=False):
    return self.sess.run(self.logits,
                         feed_dict={self.X: x_test, self.training: training})

def get_accuracy(self, x_test, y_test, training=False):
    return self.sess.run(self.accuracy,
                         feed_dict={self.X: x_test,
                                    self.Y: y_test, self.training: training})

def train(self, x_data, y_data, training=True):
    return self.sess.run([self.cost, self.optimizer], feed_dict={
        self.X: x_data, self.Y: y_data, self.training: training})

In [26]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run([cost, optimizer], feed_dict={
            X: X_train, Y: y_train})

InvalidArgumentError: Input to reshape is a tensor with 23269120 values, but the requested shape requires a multiple of 1500
	 [[node Reshape_3 (defined at <ipython-input-15-d1001015ac8b>:32) ]]

Errors may have originated from an input operation.
Input Source operations connected to node Reshape_3:
 dropout_1/Identity (defined at <ipython-input-15-d1001015ac8b>:29)

Original stack trace for 'Reshape_3':
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.6/dist-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelapp.py", line 583, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.6/dist-packages/tornado/platform/asyncio.py", line 149, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
    handle._run()
  File "/usr/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/usr/local/lib/python3.6/dist-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/usr/local/lib/python3.6/dist-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/usr/local/lib/python3.6/dist-packages/tornado/gen.py", line 787, in inner
    self.run()
  File "/usr/local/lib/python3.6/dist-packages/tornado/gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/usr/local/lib/python3.6/dist-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/usr/local/lib/python3.6/dist-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 545, in execute_request
    user_expressions, allow_stdin,
  File "/usr/local/lib/python3.6/dist-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/ipkernel.py", line 300, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2858, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2886, in _run_cell
    return runner(coro)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 3063, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 3254, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d1001015ac8b>", line 32, in <module>
    flat = tf.reshape(dropout1, [-1, 375 * 4 * 1])
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 7715, in reshape
    "Reshape", tensor=tensor, shape=shape, name=name)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3616, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 2005, in __init__
    self._traceback = tf_stack.extract_stack()


In [11]:
# initialize
sess = tf.Session()

sess.run(tf.global_variables_initializer())

print('Learning Started!')

# train my model
for epoch in range(training_epochs):
    avg_cost_list = np.zeros(len(models))
    total_batch = int(mnist.train.num_examples / batch_size)
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)

        # train each model
        for m_idx, m in enumerate(models):
            c, _ = m.train(batch_xs, batch_ys)
            avg_cost_list[m_idx] += c / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', avg_cost_list)

print('Learning Finished!')

# Test model and check accuracy
test_size = len(mnist.test.labels)
predictions = np.zeros([test_size, 10])
for m_idx, m in enumerate(models):
    print(m_idx, 'Accuracy:', m.get_accuracy(
        mnist.test.images, mnist.test.labels))
    p = m.predict(mnist.test.images)
    predictions += p

ensemble_correct_prediction = tf.equal(
    tf.argmax(predictions, 1), tf.argmax(mnist.test.labels, 1))
ensemble_accuracy = tf.reduce_mean(
    tf.cast(ensemble_correct_prediction, tf.float32))
print('Ensemble accuracy:', sess.run(ensemble_accuracy))

ValueError: Variable model0/conv2d/kernel already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "<ipython-input-8-7bb723fdf98c>", line 37, in _build_net
    activation=tf.nn.relu)
  File "<ipython-input-8-7bb723fdf98c>", line 20, in __init__
    self._build_net()
  File "<ipython-input-9-2b906355e5cd>", line 7, in <module>
    models.append(Model(sess, "model" + str(m)))
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 3254, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):


In [7]:
def kaeri_metric(y_true, y_pred):
    '''
    y_true: dataframe with true values of X,Y,M,V
    y_pred: dataframe with pred values of X,Y,M,V
    
    return: KAERI metric
    '''
    
    return 0.5 * E1(y_true, y_pred) + 0.5 * E2(y_true, y_pred)


### E1과 E2는 아래에 정의됨 ###

def E1(y_true, y_pred):
    '''
    y_true: dataframe with true values of X,Y,M,V
    y_pred: dataframe with pred values of X,Y,M,V
    
    return: distance error normalized with 2e+04
    '''
    
    _t, _p = np.array(y_true)[:,:2], np.array(y_pred)[:,:2]
    
    return np.mean(np.sum(np.square(_t - _p), axis = 1) / 2e+04)


def E2(y_true, y_pred):
    '''
    y_true: dataframe with true values of X,Y,M,V
    y_pred: dataframe with pred values of X,Y,M,V
    
    return: sum of mass and velocity's mean squared percentage error
    '''
    
    _t, _p = np.array(y_true)[:,2:], np.array(y_pred)[:,2:]
    
    
    return np.mean(np.sum(np.square((_t - _p) / (_t + 1e-06)), axis = 1))


## 특별상금1 점수 산출 지표
def E1_max(y_true, y_pred):
    '''
    y_true: dataframe with true values of X,Y,M,V
    y_pred: dataframe with pred values of X,Y,M,V
    
    return: maximum E1 error from 700 test samples
    '''
    
    _t, _p = np.array(y_true)[:,:2], np.array(y_pred)[:,:2]
    
    return np.max(np.sum(np.square(_t - _p), axis = 1) / 2e+04)


## 특별상금2 점수 산출 지표
def E2_max(y_true, y_pred):
    '''
    y_true: dataframe with true values of X,Y,M,V
    y_pred: dataframe with pred values of X,Y,M,V
    
    return: maximum E2 error from 700 test samples
    '''
    
    _t, _p = np.array(y_true)[:,2:], np.array(y_pred)[:,2:]
    
    return np.max(np.sum(np.square((_t - _p) / (_t + 1e-06)), axis = 1))