In [1]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
import seaborn as sns 

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.feature_selection import SelectKBest, f_regression

import pickle

import warnings


In [2]:
warnings.filterwarnings('ignore')

In [3]:
class FeatureEncoder(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        label_encoded_df=X.copy()
        le = preprocessing.LabelEncoder()
        for col in label_encoded_df.select_dtypes(include=['object']).columns:
            label_encoded_df[col]=le.fit_transform(label_encoded_df[col])
        return label_encoded_df
            
class FeatureSelector(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X=X.drop(['StandardHours', 'EmployeeCount', 'Over18'],axis=1)
        return X

class shuffle(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X=X.sample(frac=1, random_state=12345).reset_index()
        return X
    
pipe=Pipeline([
    ("encoder", FeatureEncoder()),
    ("selector", FeatureSelector()),
    ("shuffler", shuffle())
])

class data_preparing:
    def __init__(self,x, pipe):
        self.x=x
        self.pipe=pipe
    
    def encode_select(self):
        x_dummy=pipe.fit_transform(self.x)
        y=x_dummy.loc[:,'Attrition']
        x=x_dummy.drop('Attrition',axis=1)
        return x,y
    
    def scale(self, x):
        scaler=preprocessing.StandardScaler()
        x=scaler.fit_transform(x)
        return x
            
     

In [4]:
class best_model:
    def __init__(self):
        return self
    
    def cr_model():
        model=svm.SVC(kernel='linear')
        return model



In [5]:
def print_cv_scores(scores):
    print("Fit Time : ", scores['fit_time']," mean= ",(sum(scores['fit_time']))/5)
    print("Score Time : ", scores['score_time']," mean= ",(sum(scores['score_time']))/5)
    print("Test Accuracy : ",scores['test_accuracy'], " mean= ",(sum(scores['test_accuracy']))/5)
    print("F1 Score : ", scores['test_f1']," mean= ",(sum(scores['test_f1']))/5)
    print("Test Precision : ", scores['test_precision']," mean= ",(sum(scores['test_precision']))/5)
    print("Test Recall : ", scores['test_recall']," mean= ",(sum(scores['test_recall']))/5)

In [6]:
def print_fit_scores(y_true, y_pred):
    print("Test Accuracy : ", accuracy_score(y_true, y_pred))
    print("F1 Score : ", f1_score(y_true, y_pred))
    print("Test Precision : ", precision_score(y_true, y_pred))
    print("Test Recall : ", recall_score(y_true, y_pred))
    

In [7]:
with open('preprocessing.pickle', 'rb') as f:
    data=pickle.load(f)

In [8]:
with open('model.pickle', 'rb') as f:
    model=pickle.load(f)

In [9]:
#Train %80, test=%20 
train_ratio = 0.8
test_ratio = 0.2
#validation_ratio = 0.1
#test_ratio = 0.1

x_train, x_test, y_train, y_test = train_test_split(data.x, data.y, test_size=1 - train_ratio)


In [10]:
y_pred_svm=model.predict(x_test)

In [11]:
print_fit_scores(y_test,y_pred_svm)

Test Accuracy :  0.8503401360544217
F1 Score :  0.4054054054054055
Test Precision :  0.75
Test Recall :  0.2777777777777778
