In [1]:
import numpy as np

from sklearn import metrics
from sklearn import preprocessing 

from sklearn.linear_model import Perceptron
from sklearn.calibration import CalibratedClassifierCV

In [2]:
def convert_categorical_cols(data):
    n_row, n_col = data.shape
    
    oe = preprocessing.OrdinalEncoder()
    oe.fit(data)
    data = oe.transform(data)

    return data

def drop_columns(data, col_indices):
    for column_idx in col_indices:
        data = np.delete(data, column_idx, 1)  
    return data


def process(file):
    data = np.genfromtxt(file, delimiter=',', dtype='str')

    data = data[1:data.shape[0],]
    n_row, n_col = data.shape

    return data[:,0:n_col-1], data[:,n_col-1]

def process_test(file):
    data = np.genfromtxt(file, delimiter=',', dtype='str')

    data = data[1:data.shape[0],]
    n_row, n_col = data.shape

    return data[:,0:1], data[:,1:n_col]

In [3]:
drop_out = [0]

## read data
X, Y = process('./data/train_final.csv')

X = convert_categorical_cols(X)
X = drop_columns(X, drop_out)

# create decision tree
clf = CalibratedClassifierCV(Perceptron(tol=1e-3, random_state=0), cv=10, method='isotonic')
clf.fit(X, Y)

In [6]:
_ids, TEST_X = process_test('./data/test_final.csv')
drop_out = [0]

TEST_X = convert_categorical_cols(TEST_X)
TEST_X = drop_columns(TEST_X, drop_out)

print(TEST_X.shape)

(23842, 13)


In [7]:
# probability of each class
results = clf.predict_proba(TEST_X)

In [8]:
import csv
# generate result
auc_map = np.zeros((TEST_X.shape[0], 2))
for i in range(auc_map.shape[0]):
    auc_map[i][0] = _ids[i]
    auc_map[i][1] = np.amin(results[i])
# print(auc_map.shape)

# open the file in the write mode
f = open('./result/result.csv', 'w')
# create the csv writer
writer = csv.writer(f)
writer.writerow(["ID","Prediction"])

for row in auc_map:
    _id = str(int(row[0]))
    pred = "{:.8f}".format(float(row[1]))
    writer.writerow([int(_id), pred])

# close the file
f.close()