# Gradient Descent - Combined Cycle Power Plant

In [3]:
import numpy as np

In [16]:
def get_scores(params,x_train,x_test,y_train,y_test):

    m = params[0:len(params)-1]
    c = params[len(params)-1]


    y1_pred =pred(x_train,m,c)
    train_score = score(y_train,y1_pred)


    y2_pred = pred(x_test,m,c)
    test_score = score(y_test,y2_pred)
    
    return train_score,test_score

In [41]:
def score(y_truth,y_pred):
    u = ((y_truth-y_pred)**2).sum()
    v = ((y_truth-y_truth.mean())**2).sum()
    return 1-u/v

In [76]:
def pred(x_test,m,c):
    
    y_pred = np.zeros(len(x_test))
    
    for i in range(len(x_test)):
        x = x_test[i]
        
        y_pred[i] = (x*m).sum()+c
    
    return y_pred
    

In [43]:
def cost(x,y,params):
    total_cost = 0
    M = len(x)
    N  = len(x[0])
    
    for i in range(M):
        
        total_cost += (1/M)*((y[i]-(params*x[i]).sum())**2)
        
    return total_cost

In [64]:
def step_gradient(x,y,learning_rate,params):
    
    M = len(x)
    N = len(x[0])
    
    params_slope = np.zeros(N)
    
    for i in range(M):
        
        for j in range(N):
            params_slope[j] += (-2/M)*(y[i]-(params*x[i]).sum()) * x[i][j]
       
            
    new_params = params-learning_rate*params_slope
    
    return new_params


In [61]:
def gd(x,y,learning_rate,num_iterations):
    
    x = np.append(x, np.ones(len(x)).reshape(-1, 1), axis=1)
    
    N = len(x[0]) # no. of features (cols)
    
  
    
    params = [0]*N
    
    for i in range(num_iterations):
        params = step_gradient(x,y,learning_rate,params)
        print(i," Cost : ",cost(x,y,params))
    
    return params

# train by splitting data of train.csv into (train,test) : for testing algo and finding best parameters

In [82]:
data = np.loadtxt('train.csv',delimiter=',')
X = data[:,0:len(data[0])-1]
Y = data[:,len(data[0])-1]

from sklearn import model_selection

X_train,X_test,Y_train,Y_test = model_selection.train_test_split(X,Y,test_size=0.3)

learning_rate = 0.2
num_iterations = 100

params = gd(X_train,Y_train,learning_rate,num_iterations)

print(params)

train_score,test_score = get_scores(params,X_train,X_test,Y_train,Y_test)
print("train score : ", train_score," ********* test_score : ", test_score)

0  Cost :  3.5435310414344996e+16
1  Cost :  6.079432649392559e+27
2  Cost :  1.043013336375921e+39
3  Cost :  1.7894380653542143e+50
4  Cost :  3.07003609451889e+61
5  Cost :  5.2670845692461315e+72
6  Cost :  9.036434427960106e+83
7  Cost :  1.5503291450379998e+95
8  Cost :  2.659810655536246e+106
9  Cost :  4.563284348970087e+117
10  Cost :  7.828964819812336e+128
11  Cost :  1.3431705206731881e+140
12  Cost :  2.3044005039337188e+151
13  Cost :  3.953527568389843e+162
14  Cost :  6.78284013882853e+173
15  Cost :  1.163692918616498e+185
16  Cost :  1.9964810921698555e+196
17  Cost :  3.425247922046779e+207
18  Cost :  5.876501096604196e+218
19  Cost :  1.0081975356036408e+230
20  Cost :  1.7297065959617283e+241
21  Cost :  2.9675582437544505e+252
22  Cost :  5.091269207526214e+263
23  Cost :  8.734798111564734e+274
24  Cost :  1.4985791349828529e+286
25  Cost :  2.5710261360621878e+297


  total_cost += (1/M)*((y[i]-(params*x[i]).sum())**2)


26  Cost :  inf
27  Cost :  inf
28  Cost :  inf
29  Cost :  inf
30  Cost :  inf
31  Cost :  inf
32  Cost :  inf
33  Cost :  inf
34  Cost :  inf
35  Cost :  inf
36  Cost :  inf
37  Cost :  inf
38  Cost :  inf
39  Cost :  inf
40  Cost :  inf
41  Cost :  inf
42  Cost :  inf
43  Cost :  inf
44  Cost :  inf
45  Cost :  inf
46  Cost :  inf
47  Cost :  inf
48  Cost :  inf
49  Cost :  inf
50  Cost :  inf
51  Cost :  inf
52  Cost :  inf
53  Cost :  inf
54  Cost :  inf


  params_slope[j] += (-2/M)*(y[i]-(params*x[i]).sum()) * x[i][j]
  total_cost += (1/M)*((y[i]-(params*x[i]).sum())**2)
  params_slope[j] += (-2/M)*(y[i]-(params*x[i]).sum()) * x[i][j]
  new_params = params-learning_rate*params_slope


55  Cost :  nan
56  Cost :  nan
57  Cost :  nan
58  Cost :  nan
59  Cost :  nan
60  Cost :  nan
61  Cost :  nan
62  Cost :  nan
63  Cost :  nan
64  Cost :  nan
65  Cost :  nan
66  Cost :  nan
67  Cost :  nan
68  Cost :  nan
69  Cost :  nan
70  Cost :  nan
71  Cost :  nan
72  Cost :  nan
73  Cost :  nan
74  Cost :  nan
75  Cost :  nan
76  Cost :  nan
77  Cost :  nan
78  Cost :  nan
79  Cost :  nan
80  Cost :  nan
81  Cost :  nan
82  Cost :  nan
83  Cost :  nan
84  Cost :  nan
85  Cost :  nan
86  Cost :  nan
87  Cost :  nan
88  Cost :  nan
89  Cost :  nan
90  Cost :  nan
91  Cost :  nan
92  Cost :  nan
93  Cost :  nan
94  Cost :  nan
95  Cost :  nan
96  Cost :  nan
97  Cost :  nan
98  Cost :  nan
99  Cost :  nan
[nan nan nan nan nan]
train score :  nan  ********* test_score :  nan


## we can see that because of huge difference in range of values features can have, some features are dominating the result .
## so we need to normalize our input points so that every feature have values in same range

# Now Applying Feature Scaling to normalize input

In [22]:
from sklearn import preprocessing

In [86]:
scaler = preprocessing.StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

learning_rate = 0.2
num_iterations = 100

params = gd(X_train_scaled,Y_train,learning_rate,num_iterations)

print(params)

train_score,test_score = get_scores(params,X_train_scaled,X_test_scaled,Y_train,Y_test)
print("train score : ", train_score," ********* test_score : ", test_score)

0  Cost :  74382.58439355818
1  Cost :  26798.66027331134
2  Cost :  9667.20350018575
3  Cost :  3498.957586976741
4  Cost :  1277.7025509423288
5  Cost :  477.5160254783642
6  Cost :  189.01529031450835
7  Cost :  84.79131630840199
8  Cost :  46.957294461380414
9  Cost :  33.06150615811374
10  Cost :  27.813197068903854
11  Cost :  25.702227222840918
12  Cost :  24.741136411597495
13  Cost :  24.21167407339138
14  Cost :  23.853175098949926
15  Cost :  23.570144848501123
16  Cost :  23.32684738147846
17  Cost :  23.109268366271483
18  Cost :  22.911365553032173
19  Cost :  22.73008278806598
20  Cost :  22.563531224076957
21  Cost :  22.410318354311173
22  Cost :  22.269295037552883
23  Cost :  22.139456026171114
24  Cost :  22.019897709579393
25  Cost :  21.90979774897149
26  Cost :  21.808403476788417
27  Cost :  21.715024078551888
28  Cost :  21.62902460022176
29  Cost :  21.54982097089089
30  Cost :  21.476875678865657
31  Cost :  21.409693922568135
32  Cost :  21.347820137533454
3

## we are getting far better results by applying feature scaling and setting learning rate = 0.2 (got coefficient of determination : 0.92 approxx)

# ____ now using these parameters and all data points in "train.csv" to train our algo , and predicting "test.csv"

In [95]:
training_data = np.loadtxt('train.csv',delimiter=',')

X = training_data[:,0:len(training_data[0])-1]
Y = training_data[:,len(training_data[0])-1]

scaler = preprocessing.StandardScaler()
scaler.fit(X)

X_scaled = scaler.transform(X)

learning_rate = 0.2
num_iterations = 100

params = gd(X_scaled, Y,learning_rate,num_iterations)


(7176, 5)
0  Cost :  74383.45917420094
1  Cost :  26799.242503792175
2  Cost :  9667.643213351957
3  Cost :  3499.3247676562505
4  Cost :  1278.0283196819853
5  Cost :  477.81602697123043
6  Cost :  189.29840013368792
7  Cost :  85.06306229647089
8  Cost :  47.221331673237046
9  Cost :  33.32033677022838
10  Cost :  28.06856937973347
11  Cost :  25.955379768322803
12  Cost :  24.992956681736707
13  Cost :  24.462804989575606
14  Cost :  24.104087477563194
15  Cost :  23.821187041728106
16  Cost :  23.578279429707155
17  Cost :  23.36128561319899
18  Cost :  23.16411509657257
19  Cost :  22.983675064232603
20  Cost :  22.81804828204394
21  Cost :  22.665819874714725
22  Cost :  22.525822801627825
23  Cost :  22.397037310565374
24  Cost :  22.278547925441636
25  Cost :  22.169522539596677
26  Cost :  22.069200418735647
27  Cost :  21.976884085133452
28  Cost :  21.891933095748453
29  Cost :  21.813758883785773
30  Cost :  21.741820288241144
31  Cost :  21.675619583764714
32  Cost :  21.6

In [96]:
m = params[0:len(params)-1]
c = params[len(params)-1]

In [101]:
y_pred = pred(X_scaled,m,c)
print(y_pred)


[478.74300909 450.51945481 460.62926916 ... 431.89780503 462.68362802
 435.71895084]


In [105]:
for i in range(0,len(Y)):
    print(y_pred[i],Y[i])

478.7430090871902 482.26
450.5194548062123 446.94
460.62926916271823 452.56
428.9782994730165 433.44
475.60863254737046 480.38
440.2447290490285 436.36
477.41957395118044 485.36
476.6080223588321 481.59
429.4308205185724 432.56
454.21020632413405 449.48
458.44454579177057 460.49
467.231065523739 470.17
469.75461992386334 473.41
487.13481536143263 488.5
466.6985172755831 467.33
431.2313935825009 430.67
461.83747026458263 458.57
444.3490613588464 438.33
453.41376727332073 451.38
437.40642174971197 440.23
439.16100114957135 438.83
466.23145689295114 472.04
473.29813690522656 465.37
440.09418253244013 441.32
463.5202854472594 470.41
446.36079302869405 451.84
432.3218915582375 434.54
442.6978312125008 444.0
480.8212254229511 489.95
473.0059120360944 472.86
439.51185846204584 435.84
439.81883337192903 440.99
447.1635103771931 443.89
477.17798146133293 473.33
442.31219697005605 442.0
476.86916916807854 481.45
428.6386299316877 436.89
448.95502753555866 450.62
452.44501242524933 447.88
460.274

In [109]:
X_test = np.loadtxt("test.csv",delimiter=",")
X_test_scaled = scaler.transform(X_test)

y_pred = pred(X_test_scaled,m,c)
y_pred

array([469.98354088, 471.77732332, 433.81419249, ..., 439.16594869,
       450.78767742, 447.32008123])