In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score

%matplotlib inline

In [None]:
pd.set_option('display.max_columns', 100)

In [None]:
activity = pd.read_csv("user_vector.csv").drop("Unnamed: 0", axis=1)
label = pd.read_csv("data/train_label.csv").drop("Unnamed: 0", axis=1)

In [None]:
activity.head()

In [None]:
label = label.sort_values("acc_id")
label.head()

<br></br><br></br><br></br>

In [None]:
id_lst = [int(x) for x in label.acc_id.tolist()]
print(len(id_lst))

In [None]:
activity_dic = {}
for user in id_lst :
    activity_dic[user] = []

In [None]:
print(len(activity.head().values[0]))
activity.head().values[0]

In [None]:
for data in activity.values :
    activity_dic[int(data[0])].append([int(data[1])]+list(data[2:]))

In [None]:
activity_lst = [list(y) for y in activity_dic.items()]
activity_lst[0]

In [None]:
day_1_lst = [x for x in activity_lst]
print(len(day_1_lst))

In [None]:
day_1_dic = {}
day_1_id_lst = [x[0] for x in day_1_lst]

In [None]:
START_TOKEN = [1] + [0]*52
EMPTY_TOKEN = [0,1] + [0]*51
END_TOKEN = [0,0,1] + [0]*50

In [None]:
for user in day_1_id_lst :
    day_1_dic[(user, 0)] = START_TOKEN
    day_1_dic[(user, 1)] = EMPTY_TOKEN
    day_1_dic[(user, 2)] = EMPTY_TOKEN
    day_1_dic[(user, 3)] = EMPTY_TOKEN
    day_1_dic[(user, 4)] = EMPTY_TOKEN
    day_1_dic[(user, 5)] = EMPTY_TOKEN
    day_1_dic[(user, 6)] = EMPTY_TOKEN
    day_1_dic[(user, 7)] = EMPTY_TOKEN
    day_1_dic[(user, 8)] = EMPTY_TOKEN
    day_1_dic[(user, 9)] = END_TOKEN
    


In [None]:
for data in day_1_lst :
    user = data[0]
    lst = data[1]
    
    for idx, data2 in enumerate(lst) :
        if idx == 0 :
            first_week = data2[0]
            
            for idx2 in range(first_week) :
                day_1_dic[(user, idx2)] = START_TOKEN
                
        week = data2[0]
        day_1_dic[(user, week)] = data2[1:]

In [None]:
label_dic = {"week":0 , "month" :1, "2month":2, "retained":3}

In [None]:
label2 = label.sort_values(by="acc_id")
label2["label"] = label2["label"].map(lambda x : label_dic[x])

label2.head()

<br></br><br></br><br></br>

In [None]:
def one_hot(lst, num_class=4) :
    return np.eye(num_class)[lst]

In [None]:
day_1_total_lst = []
temp1 = list(day_1_dic.values())

last1=0

In [None]:
for now in range(0,len(temp1)+1,10) :
    if now == 0 :
        last1 = now
        continue
    
    day_1_total_lst.append(temp1[last1:now])
    last1=now

In [None]:
print(len(day_1_total_lst))

In [None]:
label_dic = label2.set_index("acc_id").to_dict()['label']
print(len(label_dic))

In [None]:
day_1_total_label = one_hot([label_dic[x] for x in day_1_id_lst])

In [None]:
print(len(day_1_total_label))

In [None]:
idx1 = len(day_1_total_lst)//6 *4
idx2 = len(day_1_total_lst)//6 *5

day_1_training_lst = np.array(day_1_total_lst[:idx1])
day_1_valid_lst = np.array(day_1_total_lst[idx1:idx2])
day_1_test_lst = np.array(day_1_total_lst[idx2:])

day_1_training_label = np.array(day_1_total_label[:idx1])
day_1_valid_label = np.array(day_1_total_label[idx1:idx2])
day_1_test_label = np.array(day_1_total_label[idx2:])

<br></br><br></br><br></br>

# Model

In [None]:
input_size = 53
input_class = 4
hidden_layer1 = 128
hidden_layer2 = 256
hidden_layer3 = 512

In [None]:
class Model1() :
    def __init__(self, sess, name):
        self.sess = sess
        self.name = name
  
    def build(self, input_length) :
        with tf.variable_scope(self.name) :
            
            self.X = tf.placeholder(tf.float32, [None, input_length, input_size])
            self.Y = tf.placeholder(tf.float32, [None, input_class])
            self.learning_rate =  tf.placeholder(tf.float32)
            self.training = tf.placeholder(tf.bool)
            
            cell1 = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer1)
            dropout1 = tf.nn.rnn_cell.DropoutWrapper(cell1, output_keep_prob=0.5)
            cell2 = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer1)
            multi_cell = tf.nn.rnn_cell.MultiRNNCell([dropout1, cell2])
            
            output, state = tf.nn.dynamic_rnn(multi_cell, self.X, dtype=tf.float32)
            output = tf.transpose(output,[1,0,2])[-1]
            
            dense1= tf.layers.dense(inputs=output, units=hidden_layer2, activation=tf.nn.relu)
            dropout = tf.layers.dropout(dense1)
            dense2 = tf.layers.dense(inputs=dropout, units=input_class)
            self.logits = dense2

            self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.name)
            
            with tf.control_dependencies(update_ops):
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)

            correct_prediction = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))     
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    def predict(self, X_input, training=False):
        return self.sess.run(self.logits,feed_dict={self.X: X_input, self.training: training})

    def get_accuracy(self, X_input, Y_input, training=False):
        return self.sess.run(self.accuracy,feed_dict={self.X: X_input,self.Y: Y_input, self.training: training})

    def train(self, X_input, Y_input, learning_rate,training=True):
        return self.sess.run([self.cost, self.optimizer], feed_dict={self.X: X_input, self.Y: Y_input, self.learning_rate:learning_rate,self.training: training})
    
    def evaluate(self, X_input, Y_input, batch_size):
        N = X_input.shape[0]
            
        total_loss = 0
        total_acc = 0
            
        for i in range(0, N, batch_size):
            X_batch = X_input[i:i + batch_size]
            Y_batch = Y_input[i:i + batch_size]
                
            feed_dict = {self.X: X_batch, self.Y: Y_batch, self.training: False}
                
            loss = self.cost
            accuracy = self.accuracy
                
            step_loss, step_acc = self.sess.run([loss, accuracy], feed_dict=feed_dict)
                
            total_loss += step_loss * X_batch.shape[0]
            total_acc += step_acc * X_batch.shape[0]
            
        total_loss /= N
        total_acc /= N
            
        return total_loss, total_acc

In [None]:
class Model2() :
    def __init__(self, sess, name):
        self.sess = sess
        self.name = name
  
    def build(self, input_length) :
        with tf.variable_scope(self.name) :
            
            self.X = tf.placeholder(tf.float32, [None, input_length, input_size])
            self.Y = tf.placeholder(tf.float32, [None, input_class])
            self.learning_rate =  tf.placeholder(tf.float32)
            self.training = tf.placeholder(tf.bool)
            
            cell1 = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer2)
            dropout1 = tf.nn.rnn_cell.DropoutWrapper(cell1, output_keep_prob=0.5)
            cell2 = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer2)
            multi_cell = tf.nn.rnn_cell.MultiRNNCell([dropout1, cell2])
            
            output, state = tf.nn.dynamic_rnn(multi_cell, self.X, dtype=tf.float32)
            output = tf.transpose(output,[1,0,2])[-1]
            
            dense1= tf.layers.dense(inputs=output, units=hidden_layer3, activation=tf.nn.relu)
            dropout = tf.layers.dropout(dense1)
            dense2 = tf.layers.dense(inputs=dropout, units=input_class)
            self.logits = dense2

            self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.name)
            
            with tf.control_dependencies(update_ops):
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)

            correct_prediction = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))     
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    def predict(self, X_input, training=False):
        return self.sess.run(self.logits,feed_dict={self.X: X_input, self.training: training})

    def get_accuracy(self, X_input, Y_input, training=False):
        return self.sess.run(self.accuracy,feed_dict={self.X: X_input,self.Y: Y_input, self.training: training})

    def train(self, X_input, Y_input, learning_rate,training=True):
        return self.sess.run([self.cost, self.optimizer], feed_dict={self.X: X_input, self.Y: Y_input, self.learning_rate:learning_rate,self.training: training})
    
    def evaluate(self, X_input, Y_input, batch_size):
        N = X_input.shape[0]
            
        total_loss = 0
        total_acc = 0
            
        for i in range(0, N, batch_size):
            X_batch = X_input[i:i + batch_size]
            Y_batch = Y_input[i:i + batch_size]
                
            feed_dict = {self.X: X_batch, self.Y: Y_batch, self.training: False}
                
            loss = self.cost
            accuracy = self.accuracy
                
            step_loss, step_acc = self.sess.run([loss, accuracy], feed_dict=feed_dict)
                
            total_loss += step_loss * X_batch.shape[0]
            total_acc += step_acc * X_batch.shape[0]
            
        total_loss /= N
        total_acc /= N
            
        return total_loss, total_acc

In [None]:
class Model3() :
    def __init__(self, sess, name):
        self.sess = sess
        self.name = name
  
    def build(self, input_length) :
        with tf.variable_scope(self.name) :
            
            self.X = tf.placeholder(tf.float32, [None, input_length, input_size])
            self.Y = tf.placeholder(tf.float32, [None, input_class])
            self.learning_rate =  tf.placeholder(tf.float32)
            self.training = tf.placeholder(tf.bool)
            
            cell1 = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer1)
            dropout1 = tf.nn.rnn_cell.DropoutWrapper(cell1, output_keep_prob=0.5)
            cell2 = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer1)
            dropout2 = tf.nn.rnn_cell.DropoutWrapper(cell2, output_keep_prob=0.5)
            cell3 = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer1)
            
            multi_cell = tf.nn.rnn_cell.MultiRNNCell([dropout1, dropout2, cell3])
            
            output, state = tf.nn.dynamic_rnn(multi_cell, self.X, dtype=tf.float32)
            output = tf.transpose(output,[1,0,2])[-1]
            
            dense1= tf.layers.dense(inputs=output, units=hidden_layer2, activation=tf.nn.relu)
            dropout = tf.layers.dropout(dense1)
            dense2 = tf.layers.dense(inputs=dropout, units=input_class)
            self.logits = dense2

            self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.name)
            
            with tf.control_dependencies(update_ops):
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)

            correct_prediction = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))     
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    def predict(self, X_input, training=False):
        return self.sess.run(self.logits,feed_dict={self.X: X_input, self.training: training})

    def get_accuracy(self, X_input, Y_input, training=False):
        return self.sess.run(self.accuracy,feed_dict={self.X: X_input,self.Y: Y_input, self.training: training})

    def train(self, X_input, Y_input, learning_rate,training=True):
        return self.sess.run([self.cost, self.optimizer], feed_dict={self.X: X_input, self.Y: Y_input, self.learning_rate:learning_rate,self.training: training})
    
    def evaluate(self, X_input, Y_input, batch_size):
        N = X_input.shape[0]
            
        total_loss = 0
        total_acc = 0
            
        for i in range(0, N, batch_size):
            X_batch = X_input[i:i + batch_size]
            Y_batch = Y_input[i:i + batch_size]
                
            feed_dict = {self.X: X_batch, self.Y: Y_batch, self.training: False}
                
            loss = self.cost
            accuracy = self.accuracy
                
            step_loss, step_acc = self.sess.run([loss, accuracy], feed_dict=feed_dict)
                
            total_loss += step_loss * X_batch.shape[0]
            total_acc += step_acc * X_batch.shape[0]
            
        total_loss /= N
        total_acc /= N
            
        return total_loss, total_acc

In [None]:
class Model4() :
    def __init__(self, sess, name):
        self.sess = sess
        self.name = name
  
    def build(self, input_length) :
        with tf.variable_scope(self.name) :
            
            self.X = tf.placeholder(tf.float32, [None, input_length, input_size])
            self.Y = tf.placeholder(tf.float32, [None, input_class])
            self.learning_rate =  tf.placeholder(tf.float32)
            self.training = tf.placeholder(tf.bool)
            
            cell1 = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer2)
            dropout1 = tf.nn.rnn_cell.DropoutWrapper(cell1, output_keep_prob=0.5)
            cell2 = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer2)
            dropout2 = tf.nn.rnn_cell.DropoutWrapper(cell2, output_keep_prob=0.5)
            cell3 = tf.nn.rnn_cell.BasicLSTMCell(hidden_layer2)
            
            multi_cell = tf.nn.rnn_cell.MultiRNNCell([dropout1, dropout2, cell3])
            
            output, state = tf.nn.dynamic_rnn(multi_cell, self.X, dtype=tf.float32)
            output = tf.transpose(output,[1,0,2])[-1]
            
            dense1= tf.layers.dense(inputs=output, units=hidden_layer3, activation=tf.nn.relu)
            dropout = tf.layers.dropout(dense1)
            dense2 = tf.layers.dense(inputs=dropout, units=input_class)
            self.logits = dense2

            self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.name)
            
            with tf.control_dependencies(update_ops):
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)

            correct_prediction = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))     
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    def predict(self, X_input, training=False):
        return self.sess.run(self.logits,feed_dict={self.X: X_input, self.training: training})

    def get_accuracy(self, X_input, Y_input, training=False):
        return self.sess.run(self.accuracy,feed_dict={self.X: X_input,self.Y: Y_input, self.training: training})

    def train(self, X_input, Y_input, learning_rate,training=True):
        return self.sess.run([self.cost, self.optimizer], feed_dict={self.X: X_input, self.Y: Y_input, self.learning_rate:learning_rate,self.training: training})
    
    def evaluate(self, X_input, Y_input, batch_size):
        N = X_input.shape[0]
            
        total_loss = 0
        total_acc = 0
            
        for i in range(0, N, batch_size):
            X_batch = X_input[i:i + batch_size]
            Y_batch = Y_input[i:i + batch_size]
                
            feed_dict = {self.X: X_batch, self.Y: Y_batch, self.training: False}
                
            loss = self.cost
            accuracy = self.accuracy
                
            step_loss, step_acc = self.sess.run([loss, accuracy], feed_dict=feed_dict)
                
            total_loss += step_loss * X_batch.shape[0]
            total_acc += step_acc * X_batch.shape[0]
            
        total_loss /= N
        total_acc /= N
            
        return total_loss, total_acc

<br></br><br></br><br></br>

In [None]:
learning_rate1 = 0.01
learning_rate2 = 0.009
learning_rate3 = 0.008
learning_rate4 = 0.007

total_epoch = 50
batch_size = 500

In [None]:
day_1_train_losses1 = []
day_1_train_accs1 = []
day_1_valid_losses1 = []
day_1_valid_accs1 = []

day_1_train_losses2 = []
day_1_train_accs2 = []
day_1_valid_losses2 = []
day_1_valid_accs2 = []

In [None]:
sess = tf.Session()

day_1_model1 = Model1(sess, "day_1_model1")
day_1_model2 = Model2(sess, "day_1_model2")

day_1_model1.build(10)
day_1_model2.build(10)

sess.run(tf.global_variables_initializer())

In [None]:
print("Ready!")

In [None]:
print('Learning Started!')
print("")

# train my model
for epoch in range(total_epoch):
    avg_cost1 = 0
    avg_cost2 = 0
    
    total_batch = int(len(day_1_training_lst) / batch_size)
    idx = 0
    
    if epoch == 0 :
        learning_rate = learning_rate1
    elif epoch == 50 :
        learning_rate = learning_rate2
    elif epoch == 70 :
        learning_rate = learning_rate3
    elif epoch == 90 :
        learning_rate = learning_rate4

    for i in range(total_batch):
        batch_xs, batch_ys = day_1_training_lst[idx:idx+batch_size],day_1_training_label[idx:idx+batch_size]
        
        c1, _ = day_1_model1.train(batch_xs, batch_ys, learning_rate)
        c2, _ = day_1_model2.train(batch_xs, batch_ys, learning_rate)
        
        avg_cost1 += c1 / total_batch
        avg_cost2 += c2 / total_batch
        
        idx += batch_size
            
    #train cost & acc
    cost1, acc1 = day_1_model1.evaluate(day_1_training_lst, day_1_training_label, batch_size = batch_size)
    cost2, acc2 = day_1_model2.evaluate(day_1_training_lst, day_1_training_label, batch_size = batch_size)
    
    day_1_train_losses1.append(cost1)
    day_1_train_accs1.append(acc1)
    day_1_train_losses2.append(cost2)
    day_1_train_accs2.append(acc2)
    
    #test cost & acc
    v_cost1, v_acc1 = day_1_model1.evaluate(day_1_valid_lst, day_1_valid_label, batch_size = batch_size)
    v_cost2, v_acc2 = day_1_model2.evaluate(day_1_valid_lst, day_1_valid_label, batch_size = batch_size)
    
    day_1_valid_losses1.append(v_cost1)
    day_1_valid_accs1.append(v_acc1)
    day_1_valid_losses2.append(v_cost2)
    day_1_valid_accs2.append(v_acc2)
    
    print("epoch : ", epoch, " -- train {:.5f}({:.1f}%), valid{:.5f}({:.1f}%)".format(cost1, acc1*100, v_cost1, v_acc1*100))
    print("epoch : ", epoch, " -- train {:.5f}({:.1f}%), valid{:.5f}({:.1f}%)".format(cost2, acc2*100, v_cost2, v_acc2*100))
    print('Accuracy:', day_1_model1.get_accuracy(day_1_test_lst, day_1_test_label))
    print('Accuracy:', day_1_model2.get_accuracy(day_1_test_lst, day_1_test_label))
    
    print("train F1 score :", f1_score(np.argmax(day_1_training_label, 1), np.argmax(day_1_model1.predict(day_1_training_lst), 1), average="weighted"))
    print("train F1 score :", f1_score(np.argmax(day_1_training_label, 1), np.argmax(day_1_model2.predict(day_1_training_lst), 1), average="weighted"))
    print("valid F1 score :", f1_score(np.argmax(day_1_valid_label, 1), np.argmax(day_1_model1.predict(day_1_valid_lst), 1), average="weighted"))
    print("valid F1 score :", f1_score(np.argmax(day_1_valid_label, 1), np.argmax(day_1_model2.predict(day_1_valid_lst), 1), average="weighted"))
    print("test  F1 score :", f1_score(np.argmax(day_1_test_label, 1), np.argmax(day_1_model1.predict(day_1_test_lst), 1), average="weighted"))
    print("test  F1 score :", f1_score(np.argmax(day_1_test_label, 1), np.argmax(day_1_model2.predict(day_1_test_lst), 1), average="weighted"))
    print(" ")

print("")
print('Learning Finished!')

In [None]:
for idx in range(1,3) :
    plt.plot(eval("day_1_train_losses"+str(idx)), label='training'+str(idx))
    plt.plot(eval("day_1_valid_losses"+str(idx)), label='valid'+str(idx))
    plt.title("model"+str(idx))
    plt.grid("on")
    plt.legend()
    plt.show()

In [None]:
for idx in range(1,3) :
    plt.plot(eval("day_1_train_accs"+str(idx)), label='training'+str(idx))
    plt.plot(eval("day_1_valid_accs"+str(idx)), label='valid'+str(idx))
    plt.title("model"+str(idx))
    plt.grid("on")
    plt.legend()
    plt.show()

In [None]:
for idx in range(1,3) :
    plt.plot(eval("day_1_train_losses"+str(idx)), label='training'+str(idx))
    
plt.grid("on")
plt.legend()
plt.show()

In [None]:
for idx in range(1,3) :
    plt.plot(eval("day_1_valid_losses"+str(idx)), label='training'+str(idx))
    
plt.grid("on")
plt.legend()
plt.show()

In [None]:
for idx in range(1,3) :
    plt.plot(eval("day_1_train_accs"+str(idx)), label='valid'+str(idx))
    
plt.grid("on")
plt.legend()
plt.show()

In [None]:
for idx in range(1,3) :
    plt.plot(eval("day_1_valid_accs"+str(idx)), label='valid'+str(idx))
    
plt.grid("on")
plt.legend()
plt.show()

In [None]:
#tf.reset_default_graph() 

<br></br><br></br><br></br>

In [None]:
saver = tf.train.Saver()
saver.save(sess, './model_RNN/original_user_not_separation_vector/original_user_vector_not_separation')

<br></br><br></br><br></br>

# Test

In [None]:
activity = pd.read_csv("user_vector_test.csv").drop("Unnamed: 0", axis=1)

In [None]:
activity.head()

In [None]:
label = activity.groupby("acc_id").count().reset_index()[["acc_id"]]
label.head()

In [None]:
id_lst = [int(x) for x in label.acc_id.tolist()]
print(len(id_lst))

In [None]:
activity_dic = {}
for user in id_lst :
    activity_dic[user] = []

In [None]:
print(len(activity.head().values[0]))
activity.head().values[0]

In [None]:
for data in activity.values :
    activity_dic[int(data[0])].append([int(data[1])]+list(data[2:]))

In [None]:
activity_lst = [list(y) for y in activity_dic.items()]
activity_lst[0]

In [None]:
day_1_lst = [x for x in activity_lst]
print(len(day_1_lst))

In [None]:
day_1_dic = {}
day_1_id_lst = [x[0] for x in day_1_lst]

In [None]:
START_TOKEN = [1] + [0]*52
EMPTY_TOKEN = [0,1] + [0]*51
END_TOKEN = [0,0,1] + [0]*50

In [None]:
for user in day_1_id_lst :
    day_1_dic[(user, 0)] = START_TOKEN
    day_1_dic[(user, 1)] = EMPTY_TOKEN
    day_1_dic[(user, 2)] = EMPTY_TOKEN
    day_1_dic[(user, 3)] = EMPTY_TOKEN
    day_1_dic[(user, 4)] = EMPTY_TOKEN
    day_1_dic[(user, 5)] = EMPTY_TOKEN
    day_1_dic[(user, 6)] = EMPTY_TOKEN
    day_1_dic[(user, 7)] = EMPTY_TOKEN
    day_1_dic[(user, 8)] = EMPTY_TOKEN
    day_1_dic[(user, 9)] = END_TOKEN
    


In [None]:
for data in day_1_lst :
    user = data[0]
    lst = data[1]
    
    for data2 in lst :
        if idx == 0 :
            first_week = data2[0]
            
            for idx2 in range(first_week) :
                day_1_dic[(user, idx2)] = START_TOKEN
                
        week = data2[0]
        day_1_dic[(user, week)] = data2[1:]

In [None]:
day_1_total_lst = []
temp1 = list(day_1_dic.values())

last1=0

In [None]:
for now in range(0,len(temp1)+1,10) :
    if now == 0 :
        last1 = now
        continue
    
    day_1_total_lst.append(temp1[last1:now])
    last1=now

In [None]:
print(len(day_1_total_lst))

In [None]:
day_1_test_lst = np.array(day_1_total_lst)

<br></br><br></br>

# Predict

In [None]:
result_11 = np.argmax(day_1_model1.predict(day_1_test_lst), axis=1)
result_12 = np.argmax(day_1_model2.predict(day_1_test_lst), axis=1)

In [None]:
def voting(result_lst) :
    final_result = []
    
    for data in result_lst :
        temp = sorted([(0, data.count(0)),(1, data.count(1)),(2, data.count(2)),(3, data.count(3))], key=lambda x :x[1], reverse=True)
        final_result.append(temp[0][0])
        
    return pd.Series(final_result)

In [None]:
dic = {0 : "acc_id", 1:"model1", 2:"model2", 3:"model3", 4:"model4"}
result_1 = pd.DataFrame(list(zip(day_1_id_lst, result_11, result_12))).rename(columns = dic)

In [None]:
#result_1["result"] = voting(list(zip(result_1.model1,result_1.model2,result_1.model3,result_1.model4)))
result_1["result"] = result_1.model2

In [None]:
result_lst = [result_1[["acc_id","result"]]]

total_result = pd.concat(result_lst).sort_values("acc_id")
total_result.head()

In [None]:
total_result.to_csv("./result/original_user_vector_not_separation/original_user_vector_not_separation.csv")

In [None]:
test_user_id_df = pd.read_csv("data/test_user_id.csv").drop("Unnamed: 0", axis=1)
test_user_id_df.head()

In [None]:
test_user_id_dic = {}

for kv in test_user_id_df.values :
    test_user_id_dic[kv[1]] = kv[0]

In [None]:
label_dic = {0 : "week", 1 : "month", 2:"2month", 3 :"retained"}

total_result["acc_id"] = total_result["acc_id"].map(lambda x: test_user_id_dic[x])
total_result["result"] = total_result["result"].map(lambda x: label_dic[x])
total_result.head()

In [None]:
total_result2 = total_result.set_index("acc_id").rename(columns = {"result" : "label"})
total_result2.head()

In [None]:
total_result2.to_csv("./result/original_user_vector_not_separation/original_user_vector_not_separation_with_acc_id.csv")