In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from xgboost import XGBClassifier
import tensorflow as tf


%matplotlib inline

In [None]:
valid = pd.read_csv("final_result/valid_250_epoch.csv").rename(columns = {"Unnamed: 0" : "acc_id"}).fillna(0)
test = pd.read_csv("final_result/test_250_epoch.csv").rename(columns = {"Unnamed: 0" : "acc_id"}).fillna(0)


In [None]:
features =  ['week-tree', 'total_week', 'retained-week-tree',
             'month-tree', "total_month", 'retained-month-tree', 'retained-2month-week_month-tree', 'week-month-tree',
             '2month-tree', "total_2month", 'retained-2month-tree', 'retained-week-month_2month-tree', 'retained-month-week_2month-tree', 'week-2month-tree', 'week-retained-month_2month-tree', 
             'retained-tree', "total_retained",'week-retained-tree', 'week-month-2month_retained-tree', 'week-2month-month_retained-tree']

target = "label"

In [None]:
"""
valid F1 score : 0.835019533206
valid F1 score : 0.511169513798
valid F1 score : 0.624146637987
valid F1 score : 0.759031198686
valid F1 score : 0.857057237039
valid F1 score : 0.559139784946
valid F1 score : 0.721834139352
valid F1 score : 0.882810071495
valid F1 score : 0.74330571304
valid F1 score : 0.930932160033
valid F1 score : 0.604991177212
valid F1 score : 0.628211250119
valid F1 score : 0.776022020593
valid F1 score : 0.747978788142
valid F1 score : 0.829327654396
valid F1 score : 0.873835053613
"""

<br></br><br></br><br></br> 

In [None]:
def my_f1_score(solution, submission):
    a=pd.DataFrame(submission,columns=['Y_hat'])
    b=pd.DataFrame(solution.tolist(),columns=['Y'])
    c=pd.concat([a,b],axis=1)
    
    tot_table=c.groupby(['Y','Y_hat']).Y_hat.count().unstack()
    accuracy=np.sum(np.diag(np.array(tot_table)))/len(c)
     
    f1_score=1/(np.mean(np.concatenate([1/np.diag(tot_table/tot_table.sum(axis=0)),1/np.diag(tot_table/tot_table.sum(axis=1))])))
    print('final accuracy:%s'%(accuracy))    
    print('final_f1_score:%s'%(f1_score))   
    print()
    
    return f1_score 

my_scorer = make_scorer(my_f1_score, greater_is_better = True)

In [None]:
def my_f1_score2(solution, submission):
    a=pd.DataFrame(submission,columns=['Y_hat'])
    b=pd.DataFrame(solution,columns=['Y'])
    c=pd.concat([a,b],axis=1)
    
    tot_table=c.groupby(['Y','Y_hat']).Y_hat.count().unstack()
    accuracy=np.sum(np.diag(np.array(tot_table)))/len(c)
     
    f1_score=1/(np.mean(np.concatenate([1/np.diag(tot_table/tot_table.sum(axis=0)),1/np.diag(tot_table/tot_table.sum(axis=1))])))
    return f1_score 

<br></br>

In [None]:
class Weight() :
    def __init__(self, sess, name):
        self.sess = sess
        self.name = name
        
    def convolution(self, input_X, kernel_size, width, num_filter, activation=True) :
        conv = tf.layers.conv2d(input_X, filters=num_filter, kernel_size=[kernel_size, width], strides=1)
        
        if activation :
            norm = tf.contrib.layers.layer_norm(conv)
            relu = tf.nn.relu(norm)
        
            return relu
        return conv
        
    def build(self, batch_size, input_dim, is_fc, fc_num_unit, output_dim) :
        with tf.variable_scope(self.name) :
            
            ## Setting ##
            self.batch_size = batch_size
            self.input_dim = input_dim
            self.output_dim = output_dim
            self.is_fc = is_fc
            self.fc_num_unit = fc_num_unit
            
            self.X = tf.placeholder(tf.float32, [None, self.input_dim])
            self.Y = tf.placeholder(tf.float32, [None, self.output_dim])
            self.learning_rate =  tf.placeholder(tf.float32)
            self.training = tf.placeholder(tf.bool)
            #############

            
            ## Weight ##
            if self.is_fc : 
                self.fc_weight1 = tf.Variable(tf.random_normal([self.input_dim, self.fc_num_unit]))
                self.fc_weight2 = tf.Variable(tf.random_normal([self.fc_num_unit, self.output_dim]))
                
                self.fc_weighted1 = tf.matmul(self.X, self.fc_weight1)
                norm = tf.contrib.layers.layer_norm(self.fc_weighted1)
                relu = tf.nn.relu(norm)
                self.fc_weighted2 = tf.matmul(relu, self.fc_weight2)
                
                self.weighted =  self.fc_weighted2
                
            else :
                self.weight = tf.Variable(tf.random_normal([self.input_dim, self.output_dim]))
                self.weighted = tf.matmul(self.X, self.weight)
            
            self.logit = self.weighted
            self.softmax = tf.nn.softmax(self.logit)
            ################
            
            
            ## Learning ##
            self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logit, labels=self.Y))

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.name)
            with tf.control_dependencies(update_ops):
                self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.cost)
            
            self.prediction = tf.equal(tf.argmax(self.logit, 1), tf.argmax(self.Y, 1))     
            self.accuracy = tf.reduce_mean(tf.cast(self.prediction, tf.float32))    
            ##############
        
        
    def train(self, X_input, Y_input, learning_rate, training=True):
        feed_dict = {self.X: X_input, self.Y: Y_input, self.learning_rate: learning_rate, self.training: training}
        _, cost = self.sess.run([self.optimizer, self.cost], feed_dict=feed_dict)
        
        return _, cost
    
    def predict(self, X_input, training=False):
        feed_dict = {self.X: X_input, self.training: training}
        result = self.sess.run([self.logit], feed_dict=feed_dict)
            
        return result
    
    def evaluate(self, X_input, Y_input):
        size = X_input.shape[0]
            
        total_loss = 0
        total_acc = 0
            
        for idx in range(0, size, self.batch_size):
            X_batch = X_input[idx:idx + batch_size]
            Y_batch = Y_input[idx:idx + batch_size]
            feed_dict = {self.X: X_batch, self.Y: Y_batch, self.training: False}
                
            loss = self.cost
            accuracy = self.accuracy
                
            step_loss, step_acc = self.sess.run([loss, accuracy], feed_dict=feed_dict)
                
            total_loss += step_loss * X_batch.shape[0]
            total_acc += step_acc * X_batch.shape[0]
            
        total_loss /= size
        total_acc /= size
            
        return total_loss, total_acc

In [None]:
best_weight = []
best_f1 = 0

for idx0 in range(0, 31)  :
    print()
    print("-------",idx0,"-------")
    
    for idx1 in range(0, 31)  :
        for idx2 in range(0, 31)  :
            for idx3 in range(0, 31)  :
                pred_df["week"] = (idx0*pred_df["model0_week"] + (30-idx0)*pred_df["model1_week"]) / 30
                pred_df["month"] = (idx1*pred_df["model0_month"] + (30-idx1)*pred_df["model1_month"]) / 30
                pred_df["2month"] = (idx2*pred_df["model0_2month"] + (30-idx2)*pred_df["model1_2month"]) / 30
                pred_df["retained"] = (idx3*pred_df["model0_retained"] + (30-idx3)*pred_df["model1_retained"]) / 30
                
                f1 = my_f1_score2(np.argmax(pred_df[["week", "month", "2month", "retained"]].values, axis=1), np.argmax(valid_label, axis=1))
                if f1 > best_f1 :
                    best_f1 = f1
                    best_weight = [idx0, idx1, idx2, idx3]
                    print(best_f1, best_weight)