# Gradient descent project- Combined Cycle Power Plant

Combined Cycle Power Plant dataset contains 9568 data points collected from a Combined Cycle Power Plant over 6 years (2006-2011), when the power plant was set to work with full load. Features consist of hourly average ambient variables Temperature (T), Ambient Pressure (AP), Relative Humidity (RH) and Exhaust Vacuum (V) to predict the net hourly electrical energy output (EP) of the plant.

1. Code Gradient Descent for N features and come with predictions.
2. Try and test with various combinations of learning rates and number of iterations.
3. Using Feature Scaling, 

In [10]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
train_data=np.genfromtxt('ccpp_x_y_train.csv',delimiter=',')
test_data=np.genfromtxt('ccpp_x_test.csv',delimiter=',')

In [11]:
train_data.shape

(7176, 5)

In [12]:
test_data.shape

(2392, 4)

In [15]:
x_train=train_data[:,0:4]
x_train=preprocessing.scale(x_train)
y_train=train_data[:,4]

In [17]:
x_train=pd.DataFrame(x_train)
x_train[4]=1
x_train=np.array(x_train)
x_train.shape

(7176, 5)

In [19]:
test_data=preprocessing.scale(test_data)
test_data=pd.DataFrame(test_data)
test_data[4]=1
test_data=np.array(test_data)
test_data.shape

(2392, 5)

In [66]:
def single_gradient(x_train,y_train, learning_rate,m):  
    m_slope = [0 for i in range(x_train.shape[1])]
    M = x_train.shape[0]
    N = x_train.shape[1]
    for j in range(N):
        for i in range(M):
            x = x_train[i]
            y = y_train[i]
            a = np.dot(m,x)
            m_slope[j] += (-2/M)* (y - a)*x[j]
    m_slope=np.array(m_slope)
    new_m=m-m_slope*learning_rate   
    return new_m
            
def gd(x_train,y_train, learning_rate, num_iterations):
    m = [0 for i in range(x_train.shape[1])]
    for i in range(num_iterations):
        m = single_gradient(x_train,y_train,learning_rate,m)
        #print(i, " Cost: ", cost(x_train,y_train,m))
    return m

def cost(x_train,y_train, m):
    total_cost = 0
    M = x_train.shape[0]
    for i in range(M):
        x = x_train[i]
        y = y_train[i]
        a = np.dot(m,x)
        total_cost += (1/M)*((y - a)**2)
    return total_cost

def run():
    learning_rate = 0.4
    num_iterations = 125
    m = gd(x_train,y_train, learning_rate, num_iterations)
    return m

In [67]:
run()

array([-1.49253583e+01, -2.91522033e+00,  3.64818800e-01, -2.32802951e+00,
        4.54431293e+02])

In [68]:
def predict(x_test):
    m=run()
    y_predict=[]
    for i in range(len(x_test)):
        a=sum(x_test[i]*m)
        y_predict.append(a)
    return y_predict

In [69]:
def score(y_true,y_predict):
    u=sum((y_true-y_predict)**2)
    v=sum((y_true-y_true.mean())**2)
    return 1-u/v

In [70]:
y_predict=predict(x_train)
y_predict

[478.8411929804532,
 450.5051681999032,
 460.65060214877195,
 428.8707406535999,
 475.69464743137587,
 440.2271388420766,
 477.51949564420295,
 476.713616893481,
 429.3159790427993,
 454.2465180077911,
 458.3839793948944,
 467.24402889517216,
 469.7810633284716,
 487.2852760198615,
 466.6975180813703,
 431.17202051173143,
 461.8122798023015,
 444.3218509690722,
 453.36355727205006,
 437.3339045956542,
 439.0449174597304,
 466.26722950095524,
 473.38496457147073,
 440.00341238773103,
 463.508771024802,
 446.27209046594686,
 432.2720207787327,
 442.75182992496275,
 480.9661089125511,
 473.06565953109475,
 439.4787778799652,
 439.69622343198705,
 447.1317423370078,
 477.24798977831944,
 442.23460434758016,
 476.9694941693071,
 428.51461377997464,
 448.93772615846035,
 452.3631516332362,
 460.25905515662953,
 473.7185695953103,
 443.8500399199295,
 461.5510646020667,
 443.31980778498536,
 467.25948625545294,
 483.8990257650675,
 441.4855933227399,
 460.07906423229116,
 430.50753498672776,


In [72]:
y_true=y_train
score(y_true,y_predict)

0.928751691014062

In [47]:
y_predict=predict(test_data)
y_predict

[470.4555257844069,
 472.2628391253808,
 433.7392184659247,
 457.46465590489237,
 464.9801408254781,
 448.27721583892264,
 478.9067957561493,
 446.79552956940455,
 484.5517331908224,
 439.9202465498948,
 434.1605623695192,
 431.58523266931,
 472.9160209852187,
 463.43440773989977,
 444.1110300074199,
 456.82204643386535,
 488.8573891574698,
 447.71774573520264,
 426.42780482732127,
 438.16112806425633,
 439.40299846487807,
 483.7665352193267,
 460.0609276573436,
 475.9669493240498,
 431.28224524831813,
 433.96761404710526,
 468.0204680748222,
 470.5849768563003,
 432.2502600305408,
 477.085575949149,
 443.2206489665983,
 431.0013951150406,
 450.1727622378339,
 471.0933970136406,
 469.34461867674605,
 472.8673296271763,
 446.6557857103796,
 455.69581375324157,
 445.70916752943737,
 481.77552345787217,
 466.2353160872653,
 434.20435667999243,
 473.7984824438292,
 467.45087116275715,
 462.37505360408943,
 485.700285275011,
 436.3035338062862,
 430.5702351080311,
 440.3102404025437,
 476.0

In [48]:
 np.savetxt('predictions4.csv',y_predict) 