In [1]:
# import package
import csv
import numpy as np
import pandas as pd

In [2]:
# num of features.
dim = 106

In [3]:
# load data from file.
def load_data():
    x_train = pd.read_csv('X_train')
    x_test = pd.read_csv('X_test')

    x_train = x_train.values
    x_test = x_test.values

    y_train = pd.read_csv('Y_train', header=None)
    y_train = y_train.values.reshape(-1)
    
    return x_train, y_train, x_test


def sigmoid(z):
    # limits output values between [1e-6, 1 - 1e-6]
    return np.clip(1 / (1.0 + np.exp(-z)), 1e-6, 1 - 1e-6)


def standardize(x_train, x_test):
    x_all = np.concatenate((x_train, x_test), axis=0)
    mean = np.mean(x_all, axis=0)
    std = np.std(x_all, axis=0)

    index = [0, 1, 3, 4, 5]
    mean_vec = np.zeros(x_all.shape[1])
    std_vec = np.ones(x_all.shape[1])
    mean_vec[index] = mean[index]
    std_vec[index] = std[index]

    x_all_nor = (x_all - mean_vec) / std_vec

    x_train_nor = x_all_nor[0:x_train.shape[0]]
    x_test_nor = x_all_nor[x_train.shape[0]:]

    return x_train_nor, x_test_nor

In [4]:
def train(x_train, y_train):
    lr = 0.01
    epoch = 1000

    w = np.zeros(x_train.shape[1])

    w_lr = np.ones(x_train.shape[1])

    adagrad_sum = np.zeros(shape=dim)

    for e in range(epoch):
        loss = y_train - sigmoid((np.dot(x_train, w)))

        g = -np.dot(x_train.T, loss)

        adagrad_sum += np.square(g)

        w = w - w_lr * lr * g / np.sqrt(adagrad_sum)

    return w


def predict(x_test, w):
    return np.around(sigmoid((np.dot(x_test, w))))

In [5]:
# load data.
x_train, y_train, x_test = load_data()

In [6]:
# standardize data.
x_train, x_test = standardize(x_train, x_test)

In [7]:
# training the model.
w = train(x_train, y_train)

In [8]:
# show the predict accuracy with train data.
y = predict(x_train, w)

result = (y_train == y)
print('Train acc = %f' % (float(result.sum()) / result.shape[0]))

Train acc = 0.838979


In [9]:
# predict test data.
y_pred = predict(x_test, w)

In [10]:
with open('logistic.csv', 'w', newline='') as csvf:
    # 建立 CSV 檔寫入器
    writer = csv.writer(csvf)
    writer.writerow(['id', 'label'])
    for i in range(int(y_pred.shape[0])):
        writer.writerow([i + 1, int(y_pred[i])])