In [1]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.metrics import r2_score
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from cluster_target_encoder import cluster_target_encoder
import pickle
pd.options.mode.chained_assignment = None
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
feats_to_drop = np.load('feats_to_drop.npy')
best_model = pickle.load(open('best_model.sav','rb'))

labels = df_train['y']
df_train.drop('y', axis=1,inplace=True)


#to add new feature label.
encoder = cluster_target_encoder(nclusters=4,seed=0)
encoder.fit(df_train['X0'],labels)

x_train, x_valid, y_train, y_valid = train_test_split(df_train, labels, test_size=0.2, random_state=420)

def final_predict(df,actual):
    '''
    This function takes a dataframe removes the unwanted features adds necessary features and predicts the output using 
    the best model.
    '''
    #Drop features
    df.drop(feats_to_drop,axis=1,inplace=True)
    
    #adding new features
    df['X29+X127+X276'] = df['X29'] + df['X127'] + df['X276']
    df['X136+X314+X315'] = df['X136'] + df['X314'] + df['X315']
    df['X136+X179+X261'] = df['X136'] + df['X179'] + df['X261']
    
    #adding lable feature
    labels=encoder.transform(df['X0'])
    df['labels'] = labels
    
    #encoding categorical features
    categorical=[]
    mapper = lambda x: sum([ord(digit) for digit in x])
    for i in df.columns:
        if df[i].dtype=='object':
            df[i] = df[i].apply(mapper)   

    dmatrix = xgb.DMatrix(df, label=actual)
    preds = best_model.predict(dmatrix)
    score = r2_score(actual, preds)
    return score

train_metric = final_predict(x_train,y_train)
val_metric = final_predict(x_valid,y_valid)

print("Train Metric:", train_metric)
print("Validation Metric:", val_metric)

Train Metric: 0.5762407803099825
Validation Metric: 0.666801651053621
