# Combined Cycle Power Plant

In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing

### 1) Loading Training and Testing Data....

In [2]:
testing_data = np.loadtxt("testing_data.csv", delimiter = ',')
training_data = np.genfromtxt("training_data.csv", delimiter = ',')
training_data.shape, testing_data.shape

((7176, 5), (2392, 4))

In [3]:
x_train = training_data[ : , 0 : -1]
y_train = training_data[ : , -1]
x_test = testing_data[ : , : ]
x_train.shape, y_train.shape, x_test.shape

((7176, 4), (7176,), (2392, 4))

### 2) Feature Scaling

In [4]:
x_train

array([[   8.58,   38.38, 1021.03,   84.37],
       [  21.79,   58.2 , 1017.21,   66.74],
       [  16.64,   48.92, 1011.55,   78.76],
       ...,
       [  29.8 ,   69.34, 1009.36,   64.74],
       [  16.37,   54.3 , 1017.94,   63.63],
       [  30.11,   62.04, 1010.69,   47.96]])

In [5]:
## Scale training datasets
scaler = preprocessing.StandardScaler()
x_train = scaler.fit_transform(x_train)
x_train

array([[-1.47827466e+00, -1.24764165e+00,  1.30221067e+00,
         7.58624590e-01],
       [ 2.89012041e-01,  3.06797549e-01,  6.61749044e-01,
        -4.46921842e-01],
       [-3.99975582e-01, -4.21012529e-01, -2.87207194e-01,
         3.75010552e-01],
       ...,
       [ 1.36062192e+00,  1.18048335e+00, -6.54382840e-01,
        -5.83682640e-01],
       [-4.36097263e-01,  9.29089763e-04,  7.84140927e-01,
        -6.59584883e-01],
       [ 1.40209496e+00,  6.07960340e-01, -4.31394890e-01,
        -1.73110573e+00]])

### 3) Implement Generic Gradient Descent

In [6]:
def score(Y_pred, Y_actual):
    u = ((Y_actual-Y_pred)**2).sum()
    v = ((Y_actual-Y_actual.mean())**2).sum()
    cod = 1 - u/v
    return cod

def cost(X, Y, m):
    total_cost = 0
    N = len(X)
    c = np.array([[1] for i in range(N)])
    X = np.append(X, c, axis = 1)
    print(X.shape)
    for i in range(N):
        x = X[i]
        y = Y[i]
        total_cost += (1/N) * (y - (m * x).sum()) ** 2
    return total_cost

def predict(X, m):
    N = len(X)
    c = np.array([[1] for i in range(N)])
    X = np.append(X, c, axis = 1)
    y_predict = []
    for i in range(N):
        y_predict.append((X[i] * m).sum())
    return y_predict

def step_gradient(X, Y, learning_rate, m):
    N = len(X)
    M = len(X[0])
    new_m = []
    for j in range(M):
        m_slope = 0
        for i in range(N):
            x = X[i, : ] ## ith row
            y = Y[i]     ##  output
            m_slope += (-2/N) * (y - (m * x).sum()) * x[j]
        m_slope = m[j] - learning_rate * m_slope
        new_m.append(m_slope)
    return new_m 
    
    
def gd(X, Y, learning_rate, num_iteration):
    N = len(X)
    M = len(X[0])
    m = [0 for i in range(M)]
    for i in range(num_iteration):
        m = step_gradient(X, Y, learning_rate, m)
    return m


def fit(X, Y, learning_rate = 0.1, num_iteration = 100):
    N = len(X)
    ## For c(constant) ... add extra column
    c = np.array([[1] for i in range(N)])
    X = np.append(X, c, axis = 1)
    m = gd(X, Y, learning_rate, num_iteration)
    return m

### 4) Score on training and testing data

In [7]:
m = fit(x_train, y_train)
m

[-13.883055046306245,
 -3.648929378779191,
 0.5395473906772362,
 -2.0098218324370363,
 454.4312931069847]

In [8]:
y_train_pred = predict(x_train, m)

In [9]:
score(y_train_pred, y_train)

0.9281545459956255

In [10]:
x_test = scaler.transform(x_test)
y_test_pred = predict(x_test, m)

In [11]:
df = pd.DataFrame(y_test_pred)
df.to_csv("Prediction.csv", index = False, header = None)
## Got a 0.92839% accuracy on Coding Ninjas