In [None]:
from autograd import numpy as np
import pandas as pd
from sklearn import preprocessing
from autograd import grad
import matplotlib.pyplot as plt


import gc, sys
import time
gc.enable()

INPUT_DIR = "../input/"

def gradient_descent(g, epochs, alpha, bealta, w, batch_size):
    # compute gradient module using autograd
    gradient = grad(g)
    # d = 0

    #steps = x.shape[0] / batch_size
    #steps = int(steps)

    steps = int(np.ceil(x.shape[0] / batch_size))

    # run the gradient descent loop
    weight_history = [w]  # weight history container
    cost_history = [g(w, range(0, x.shape[0]))]  # cost function history container
    for j in range(epochs):
        print('epoch:' + str(j+1) + '  cost:'+ str(cost_history[-1]))
        for i in range(steps):
            batch = np.arange(i * batch_size, min((i + 1) * batch_size, x.shape[0]))
            
            # take gradient descent step
            grad_eval = gradient(w, (batch))
            # normalized_term = np.linalg.norm(grad_eval)

            grad_eval = grad_eval / (np.abs(grad_eval) + 10**(-8))

            w = w - alpha * grad_eval

            # w = w - (alpha/(normalized_term+10**(-16))) * grad_eval 

            # record weight and cost
        weight_history.append(w)
        cost_history.append(g(w, range(0, x.shape[0])))
    return weight_history, cost_history

def model(x_p, w):
    a = w[0] + np.dot((x_p), w[1:])
    return a.T
	
def cost_function(w,iter):
    x_p = x[iter,:]
    y_p = y[iter]
    cost = np.sum(np.maximum(-y_p*model(x_p,w),0))/float(np.size(y_p))              # Perceptron cost
    # cost = np.sum(np.log(1 + np.exp(-y_p * model(x_p, w))))/float(np.size(y_p))
    return cost


def feature_engineering(is_train=True):
    if is_train:
        print("processing train.csv")
        # df = pd.read_csv(INPUT_DIR + 'train_V2.csv', nrows = 10000)
        df = pd.read_csv(INPUT_DIR + 'train_V2.csv')
        df = df[df['maxPlace'] > 1]
    else:
        print("processing test.csv")
        #df = pd.read_csv(INPUT_DIR + 'test_V2.csv', nrows = 10000)
        df = pd.read_csv(INPUT_DIR + 'test_V2.csv')
    # df = reduce_mem_usage(df)
    df['totalDistance'] = df['rideDistance'] + df["walkDistance"] + df["swimDistance"]



    print("remove some columns")
    target = 'winPlacePerc'
    features = list(df.columns)
    features.remove("Id")
    features.remove("matchId")
    features.remove("groupId")

    features.remove("matchType")


    y = None

    print("get target")
    if is_train:
        y = np.array(df.groupby(['matchId', 'groupId'])[target].agg('mean'), dtype=np.float64)
        features.remove(target)

    print("get group mean feature")
    agg = df.groupby(['matchId', 'groupId'])[features].agg('mean')
    #agg_rank = agg.groupby('matchId')[features].rank(pct=True).reset_index()

    if is_train:
        df_out = agg.reset_index()[['matchId', 'groupId']]
    else:
        df_out = df[['matchId', 'groupId']]

    df_out = df_out.merge(agg.reset_index(), suffixes=["", ""], how='left', on=['matchId', 'groupId'])
   # df_out = df_out.merge(agg_rank, suffixes=["_mean", "_mean_rank"], how='left', on=['matchId', 'groupId'])


    df_out.drop(["matchId", "groupId"], axis=1, inplace=True)

    X = np.array(df_out, dtype=np.float64)

    feature_names = list(df_out.columns)

    del df, df_out, agg
    gc.collect()

    return X, y, feature_names


data_x, y, feature_names = feature_engineering(True)
w0 = 0.1* np.random.rand(data_x.shape[1]+ 1, 1)

x_means = np.mean(data_x,axis = 0)[np.newaxis,:]
x_stds = np.std(data_x,axis = 0)[np.newaxis,:]
x = (data_x - x_means)/(x_stds+0.0000001)
print(x.shape)
#y = 2 * y - 1
for i in range(len(y)):
    if y[i] < 0.5:
        y[i] = -1
    else:
        y[i] = 1

w_history, cost_history = gradient_descent(cost_function, 200, 0.01, 0.8 , w0, 10240)

# w_history, cost_history = gradient_descent(cost_function, 20, 0.1, 0.8, w0, 64)
w1 = w_history[-1]
print (cost_history[-1])

plt.plot(cost_history)
plt.show()


x_test, _, _ = feature_engineering(False)
x_means = np.mean(x_test,axis = 0)[np.newaxis,:]
x_stds = np.std(x_test,axis = 0)[np.newaxis,:]
x_test = (x_test - x_means)/(x_stds+0.0000001)
print(x_test.shape)

y_predict =  model(x_test, w1)
y_predict = y_predict.reshape(-1, 1)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
print(y_predict.shape)
scaler.fit(y_predict)
y_predict = scaler.transform(y_predict)

df_test = pd.read_csv(INPUT_DIR + 'test_V2.csv')

df_test['winPlacePerc'] = y_predict

for i in range(len(df_test)):
    winPlacePerc = y_predict[i][0]
    maxPlace = int(df_test.iloc[i]['maxPlace'])
    if maxPlace == 0:
        winPlacePerc = 0.0
    elif maxPlace == 1:
        winPlacePerc = 1.0
    else:
        gap = 1.0 / (maxPlace - 1)
        winPlacePerc = round(winPlacePerc / gap) * gap

    if winPlacePerc < 0: winPlacePerc = 0.0
    if winPlacePerc > 1: winPlacePerc = 1.0
    y_predict[i][0] = winPlacePerc

# a = np.sum(y_predict)/float(len(y_predict))

df_test['winPlacePerc'] = y_predict
submission = df_test[['Id', 'winPlacePerc']]
submission.to_csv('PerceptronNormalized.csv', index=False)