In [1]:
import numpy as np
from sklearn import preprocessing

In [2]:
def step_gradient(points, learning_rate, m):
    m_slope = np.zeros(points.shape[1])
    
    M = len(points)
    for i in range(M):
        x = points[i,:(points.shape[1]-1)]
        x = np.append(x,1)
        y = points[i,points.shape[1]-1]
        
        for j in range(points.shape[1]):
            m_slope[j] += (-2/M)* (y-(m*x).sum())*x[j]
    
    new_m = m - learning_rate*m_slope
    
    return new_m

In [3]:
def gd(points, learning_rate, num_iterations):
    m = np.zeros(points.shape[1])
    for i in range(num_iterations):
        m = step_gradient(points, learning_rate, m)
        print((i+1), " Cost: ", cost(points, m))
    return m

In [4]:
def cost(points, m):
    total_cost = 0
    M = len(points)
    for i in range(M):
        x = points[i][:(points.shape[1]-1)]
        x = np.append(x,1)
        y = points[i][points.shape[1]-1]
        total_cost += (1/M)*(((y-(m*x)).sum())**2)
    return total_cost

In [5]:
def pred(data, m):
    ms = m[:-1]
    c = m[-1]
    ans = []
    
    for d in data:
        ans.append((ms*d).sum() + c)
        
    return ans

In [6]:
def run(data, learning_rate, num_iterations):
    #learning_rate = 0.15
    #num_iterations = 100
    m = gd(data, learning_rate, num_iterations)
    print(m)
    return m

In [7]:
train_data = np.loadtxt('train_power_plant.csv', delimiter=',')
test_data = np.loadtxt('test_power_plant.csv', delimiter=',')

In [8]:
scaler = preprocessing.StandardScaler()
scaler.fit(train_data[:,:-1])

fs = scaler.transform(train_data[:,:-1])

train = np.insert(fs, train_data.shape[1]-1, train_data[:,-1], axis=1)
test = scaler.transform(test_data)

m1 = run(train, 0.2, 200)

1  Cost:  4374836.5812505875
2  Cost:  3930688.5615794524
3  Cost:  3675638.0678989305
4  Cost:  3526714.910480043
5  Cost:  3438837.3733827863
6  Cost:  3386640.6074158354
7  Cost:  3355511.774732079
8  Cost:  3336901.3068856043
9  Cost:  3325757.953714549
10  Cost:  3319079.1925762803
11  Cost:  3315073.6487587383
12  Cost:  3312670.1292775935
13  Cost:  3311227.211432695
14  Cost:  3310360.495688122
15  Cost:  3309839.496664083
16  Cost:  3309525.971367481
17  Cost:  3309336.985450745
18  Cost:  3309222.7776210634
19  Cost:  3309153.486925878
20  Cost:  3309111.192051281
21  Cost:  3309085.1358113163
22  Cost:  3309068.86005695
23  Cost:  3309058.486611273
24  Cost:  3309051.6857574983
25  Cost:  3309047.057251631
26  Cost:  3309043.758858043
27  Cost:  3309041.2834114847
28  Cost:  3309039.3250060566
29  Cost:  3309037.6986701363
30  Cost:  3309036.2921407525
31  Cost:  3309035.0369036635
32  Cost:  3309033.890797968
33  Cost:  3309032.827563086
34  Cost:  3309031.8305545
35  Cost:

In [9]:
ans1  = pred(test, m1)
np.savetxt("predictions_power_plant.csv", ans1, delimiter=",")

In [10]:
def score(y_truth, y_pred):
    u = ((y_truth - y_pred)**2).sum()
    v = ((y_truth - y_truth.mean())**2).sum()
    return 1 - u/v

In [11]:
s1 = pred(train[:,:-1], m1)
print('Coefficient of determination based on Training Data (With Feature Scaling) is:',score(train[:,-1], s1))

Coefficient of determination based on Training Data (With Feature Scaling) is: 0.9287631977982681
