In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ML_utils import load_data,load_data_test_set, total_day_eve_night_grouping
df =pd.read_csv('train.csv')

### UNCOMMENT THIS PART TO GROUP FEATURES
feature_engineering = True
df=total_day_eve_night_grouping(df,grouping=feature_engineering)
exclude_list = [feature_name for feature_name in df.columns if feature_name not in['number_vmail_messages','number_customer_service_calls','total_charges','churn']]
one_hot_on = True
normalize_on = True
oversample_on=True


###################
#### LOAD DATA ####

X_train,y_train,X_val,y_val,X_test,y_test,columns = load_data(df,exclude=exclude_list,one_hot=one_hot_on,normalize_=normalize_on,oversample=oversample_on)


y_train[y_train==0]=-1


In [36]:
kernels=['rbf','poly']

C=[100]

degree=[2,3]
r=[1]
sigma=[1,4]



In [None]:
from cvxopt import matrix, solvers

In [37]:
def Kernel_func(x,z,kernel='linear',r=1,degree=1,sigma=1):
    if kernel=='linear':
        return np.matmul(x,z.T)
    elif kernel=='poly':
        return np.power(np.matmul(x,z.T)+r,degree)
    elif kernel=='rbf':
        n = x.shape[0]
        m = z.shape[0]
        xx = np.dot(np.sum(np.power(x, 2), 1).reshape(n, 1), np.ones((1, m)))
        zz = np.dot(np.sum(np.power(z, 2), 1).reshape(m, 1), np.ones((1, n)))     
        return np.exp(-(xx + zz.T - 2 * np.dot(x, z.T)) / (2 * sigma ** 2))
    elif kernel=='sigmoid':
        return np.tanh(sigma*np.matmul(x,z.T)+r)
    

def svm_train(X_train,y_train,kernel='linear',C=1,degree=1,r=1,sigma=1):
    
    X=X_train.astype(np.double)
    y=y_train.astype(np.double)
    m, n = X.shape
    K = Kernel_func(X,X,kernel=kernel,r=r,degree=degree,sigma=sigma)
    P = matrix(np.matmul(y,y.T) * K)
    q = matrix(np.ones((m, 1)) * -1)
    A = matrix(y.reshape(1, -1))
    b = matrix(np.zeros(1))          
    G = matrix(np.vstack((np.eye(m) * -1, np.eye(m))))        
    h = matrix(np.hstack((np.zeros(m), np.ones(m) * C)))
    solution = solvers.qp(P, q, G, h, A, b)
    lam = np.array(solution['x'])
    ind = (lam > 1e-6).flatten()
    sv_x = X[ind]
    sv_y = y[ind]
    lams = lam[ind]

    return sv_x,sv_y,lams


def predict(X,sv_x,sv_y,lams,kernel='linear',r=1,degree=1,sigma=1):
  b = sv_y - np.sum(Kernel_func(sv_x,sv_x,kernel=kernel,r=r,degree=degree,sigma=sigma) * lams * sv_y, axis=0)
  b = np.sum(b) / b.size
  prod = np.sum(Kernel_func(sv_x,X,kernel=kernel,r=r,degree=degree,sigma=sigma) * lams * sv_y, axis=0) + b
  predictions = np.sign(prod)
  predictions[predictions==-1]=0
  return predictions

In [39]:

def grid_search(X_train,y_train,X_val,y_val,kernels,C,degree,r,sigma):
    best_acc=0
    best_params=[]
    for k in kernels:
        for c in C:
            for d in degree:
                for rr in r:
                    for s in sigma:
                        sv_x,sv_y,lams=svm_train(X_train,y_train,kernel=k,C=c,degree=d,r=rr,sigma=s)
                        y_pred=predict(X_val,sv_x,sv_y,lams,kernel=k,r=rr,degree=d,sigma=s)
                        acc=np.mean(y_pred==y_val)
                        if acc>best_acc:
                            best_acc=acc
                            best_params={'kernel':k,'C':c,'degree':d,'r':rr,'sigma':s}
                            best_results={'sv_x':sv_x,'sv_y':sv_y,'lams':lams}
    return {'best_acc':best_acc,'best_params':best_params,'best_results':best_results}

In [40]:
results=grid_search(X_train,y_train,X_val,y_val,kernels,C,degree,r,sigma)

     pcost       dcost       gap    pres   dres
 0:  8.5738e+04 -1.3292e+07  2e+07  2e-01  1e-12
 1:  5.0091e+04 -3.3290e+06  4e+06  4e-02  7e-13
 2: -2.4779e+04 -7.7733e+05  8e+05  6e-03  7e-13
 3: -6.7783e+04 -2.5465e+05  2e+05  6e-04  8e-13
 4: -8.1651e+04 -2.0522e+05  1e+05  3e-04  8e-13
 5: -9.2977e+04 -1.5337e+05  6e+04  1e-04  8e-13
 6: -9.9455e+04 -1.2720e+05  3e+04  5e-05  8e-13
 7: -1.0156e+05 -1.2009e+05  2e+04  3e-05  8e-13
 8: -1.0333e+05 -1.1400e+05  1e+04  1e-05  8e-13
 9: -1.0439e+05 -1.1065e+05  6e+03  7e-06  8e-13
10: -1.0481e+05 -1.0939e+05  5e+03  4e-06  7e-13
11: -1.0521e+05 -1.0826e+05  3e+03  3e-06  8e-13
12: -1.0553e+05 -1.0739e+05  2e+03  1e-06  8e-13
13: -1.0572e+05 -1.0688e+05  1e+03  6e-07  8e-13
14: -1.0591e+05 -1.0646e+05  6e+02  1e-07  8e-13
15: -1.0598e+05 -1.0632e+05  3e+02  8e-08  8e-13
16: -1.0604e+05 -1.0622e+05  2e+02  3e-08  8e-13
17: -1.0608e+05 -1.0614e+05  6e+01  3e-09  1e-12
18: -1.0609e+05 -1.0612e+05  3e+01  4e-10  1e-12
19: -1.0610e+05 -1.06

In [41]:
sv_x=results['best_results']['sv_x']
sv_y=results['best_results']['sv_y']
lams=results['best_results']['lams']
kernel_best=results['best_params']['kernel']
C_best=results['best_params']['C']
degree_best=results['best_params']['degree']
r_best=results['best_params']['r']
sigma_best=results['best_params']['sigma']


In [42]:
print ('Best kernel:',kernel_best)
print ('Best C:',C_best)
print ('Best degree:',degree_best)
print ('Best r:',r_best)
print ('Best sigma:',sigma_best)
print ('Best accuracy:',results['best_acc'])


Best kernel: rbf
Best C: 100
Best degree: 2
Best r: 1
Best sigma: 4
Best accuracy: 0.7384989981984824


In [43]:
y_pred=predict(X_val,sv_x,sv_y,lams,kernel=kernel_best,r=r_best,degree=degree_best,sigma=sigma_best)

In [44]:
y_pred.shape

(637,)

In [45]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_val, y_pred)
accuracy

0.8948194662480377

In [46]:
### USE THE MODEL ON THE TEST SET
df_test=pd.read_csv('test.csv')

df_test=total_day_eve_night_grouping(df_test,grouping=feature_engineering)


X_test_output=load_data_test_set(df_test,exclude=exclude_list+['id'],one_hot=one_hot_on,normalize_=normalize_on)

y_pred_output=predict(X_test_output,sv_x,sv_y,lams,kernel=kernel_best,r=r_best,degree=degree_best,sigma=sigma_best)

y_pred_output=np.where(y_pred_output>0.0,'yes', 'no')
y_pred_output=y_pred_output.reshape(-1,)
id_column = np.arange(1, y_pred_output.shape[0] + 1)


# Create a DataFrame
df_output = pd.DataFrame({
    'id': id_column,
    'churn': y_pred_output
})


# Save the DataFrame as a CSV file
df_output.to_csv('output_svm_QP.csv', index=False)