# Importing necessary libraries

In [1]:
import numpy as np
from sklearn import preprocessing
from sklearn import model_selection

# Loading Datasets

In [2]:
train = np.genfromtxt('./training_ccpp_x_y_train.csv', delimiter=",")
test = np.genfromtxt('./test_ccpp_x_test.csv', delimiter=",")
print(train.shape)
print(test.shape)

(7176, 5)
(2392, 4)


# Preprocessing and scaling

In [3]:
x = train[: , :-1]
y = train[: , -1]

scaler = preprocessing.StandardScaler()
scaler.fit(x)
x = scaler.transform(x)
x

array([[-1.47827466e+00, -1.24764165e+00,  1.30221067e+00,
         7.58624590e-01],
       [ 2.89012041e-01,  3.06797549e-01,  6.61749044e-01,
        -4.46921842e-01],
       [-3.99975582e-01, -4.21012529e-01, -2.87207194e-01,
         3.75010552e-01],
       ...,
       [ 1.36062192e+00,  1.18048335e+00, -6.54382840e-01,
        -5.83682640e-01],
       [-4.36097263e-01,  9.29089763e-04,  7.84140927e-01,
        -6.59584883e-01],
       [ 1.40209496e+00,  6.07960340e-01, -4.31394890e-01,
        -1.73110573e+00]])

# Implementing gradient descent

In [4]:
def cost(x, y, m):
    cost = 0 
    for i in range(len(x)):
        cost += (1/(len(x)))*((y[i]-sum(m*x[i]))**2)
    return cost
        
def step_gradient(x, y, m, learning_rate):
    m_slope = np.zeros(len(x[0]))
    M = len(x)
    for i in range(M):
        X = x[i]
        Y = y[i]
        for j in range(len(X)):
            m_slope[j] += (-2/M)*(Y-sum(m*X))*(X[j])
#     print(m_slope)
    new_m = m - (learning_rate*m_slope)
    return new_m

def gd(x, y, learning_rate, number_of_iter):
    m = np.zeros(len(x[0]))
    for i in range(number_of_iter):
        m = step_gradient(x, y, m, learning_rate)
        print("iteration", i, " ", cost(x, y, m))
    return m 

def gradient_descent(x, y):
    learning_rate = 0.35
    number_of_iter = 100
    x = np.append(x, np.ones(len(x)).reshape(-1, 1), axis=1)
    m = gd(x, y, learning_rate, number_of_iter)
    return m

# Function calling

In [5]:
m = gradient_descent(x, y)

iteration 0   18746.02651010732
iteration 1   1766.8066063688912
iteration 2   211.14167087414796
iteration 3   56.95576326493632
iteration 4   35.63911811652189
iteration 5   29.70538304469621
iteration 6   26.915995522679363
iteration 7   25.32814618695703
iteration 8   24.339144656950022
iteration 9   23.675325116258644
iteration 10   23.198090719847613
iteration 11   22.83462055634631
iteration 12   22.545462539383276
iteration 13   22.30836449727373
iteration 14   22.110082796897142
iteration 15   21.942202452085393
iteration 16   21.798986770874226
iteration 17   21.676257458867614
iteration 18   21.5708001193456
iteration 19   21.480039329682675
iteration 20   21.401853421210365
iteration 21   21.33446281423798
iteration 22   21.276358045579634
iteration 23   21.226250012096965
iteration 24   21.18303329198881
iteration 25   21.14575766593341
iteration 26   21.11360515144266
iteration 27   21.08587100300165
iteration 28   21.061947733096268
iteration 29   21.04131153649423
itera

# Making prediction

In [6]:
x = test[:, :]
x = scaler.transform(x)
x = np.append(x, np.ones(len(x)).reshape(-1, 1), axis=1)
y_pred = []
for i in x:
    y_pred.append(sum(m*i))
y_pred = np.array(y_pred)
y_pred

array([469.95815235, 471.76676052, 433.85480447, ..., 439.13979258,
       450.66436422, 447.27468095])

# Saving the file

In [7]:
np.savetxt('predictions.csv', y_pred, delimiter=',', fmt="%.5f")