In [23]:
import numpy as np
import pandas as pd
import warnings
from sklearn import preprocessing
warnings.filterwarnings("ignore")
#https://numpy.org/doc/stable/user/basics.broadcasting.html

In [24]:
def step_gradient(learning_rate,X,Y,m):
    '''
    The cost is calculated for a machine learning algorithm over the entire training dataset for each iteration 
    of the gradient descent algorithm. In Gradient Descent, one iteration of the algorithm is called one batch, 
    which denotes the total number of samples from a dataset that is used for calculating the gradient for each 
    iteration.
    '''
    M = X.shape[0]
    N = X.shape[1]
    m_slope = np.zeros(N)
    for i in range(M):
        x = X[i]
        y_pred = (m*x).sum()
        for j in range(N):
            m_slope[j] += (-2/M)*(Y[i] - y_pred)*x[j]         
    m = m - learning_rate*(m_slope)
    return m

In [25]:
def cost(X, Y, m):
    return ((Y - np.sum(m*X, axis = 1))**2).mean()

In [26]:
def gradient_descent(learning_rate,no_of_iterations,x_train,y_train):
    '''
    This function is used to calculate the values of m,c at of each iteration.
    '''
    m = np.zeros(x_train.shape[1])
    for i in range(no_of_iterations):
        m = step_gradient(learning_rate,x_train,y_train,m)
        print(i+1, " Cost: ", cost(x_train,y_train, m))
    return m

In [27]:
def predict(X, m):
    return np.sum(m*X, axis = 1)

```With feature scaling```


In [28]:
#Preprocessing
def run():
    boston_data_train = np.genfromtxt("../../datasets/0000000000002419_training_ccpp_x_y_train.csv",delimiter=",")
    boston_data_test  = np.genfromtxt("../../datasets/0000000000002419_test_ccpp_x_test.csv",delimiter=",")
    no_of_iterations  = 500
    learning_rate     = 0.01
    x_train,y_train   = boston_data_train[:,:-1],boston_data_train[:,-1]
    
    #reshaping in order to avoid brodcasting error
    x_train = np.insert(x_train, x_train.shape[1], 1, axis = 1)
    x_test  = np.insert(boston_data_test, boston_data_test.shape[1], 1, axis = 1)
    
    #feature scaling
    #std_scaler = preprocessing.StandardScaler()
    #x_train = std_scaler.fit_transform(x_train)
    #x_test  = std_scaler.transform(x_test)
    
    #normalization
    #min_max_scaler = preprocessing.MinMaxScaler(feature_range =(0, 1)) 
    #x_train = min_max_scaler.fit_transform(x_train)
    #x_test  = min_max_scaler.fit_transform(x_test) 
    
    
    m = gradient_descent(learning_rate,no_of_iterations,x_train,y_train)
    
    #print(m)
    y_pred = predict(x_test,m)
    y_pred = y_pred.reshape(x_test.shape[0],1)
    #print(y_pred)
    np.savetxt("output.csv",y_pred)
    print(y_pred.shape)
    
    
run()

1  Cost:  8857597445871488.0
2  Cost:  3.798570373357952e+26
3  Cost:  1.6290124912999657e+37
4  Cost:  6.9860011424915866e+47
5  Cost:  2.995938473372167e+58
6  Cost:  1.2848047335174454e+69
7  Cost:  5.5098701723698053e+79
8  Cost:  2.3629014218570383e+90
9  Cost:  1.0133275294602155e+101
10  Cost:  4.3456433368893526e+111
11  Cost:  1.863624096101514e+122
12  Cost:  7.992130283881702e+132
13  Cost:  3.4274157867005606e+143
14  Cost:  1.4698432780325927e+154
15  Cost:  6.303405820679176e+164
16  Cost:  2.70320826267651e+175
17  Cost:  1.1592677227650105e+186
18  Cost:  4.971506160291727e+196
19  Cost:  2.1320246407678796e+207
20  Cost:  9.14316290131011e+217
21  Cost:  3.921034787373988e+228
22  Cost:  1.6815312129672287e+239
23  Cost:  7.21122707018041e+249
24  Cost:  3.092526350785973e+260
25  Cost:  1.3262263325271047e+271
26  Cost:  5.6875062184717713e+281
27  Cost:  2.439080433843946e+292
28  Cost:  1.0459968102433075e+303
29  Cost:  inf
30  Cost:  inf
31  Cost:  inf
32  Cost:  

487  Cost:  nan
488  Cost:  nan
489  Cost:  nan
490  Cost:  nan
491  Cost:  nan
492  Cost:  nan
493  Cost:  nan
494  Cost:  nan
495  Cost:  nan
496  Cost:  nan
497  Cost:  nan
498  Cost:  nan
499  Cost:  nan
500  Cost:  nan
(2392, 1)


In [31]:
# Python program to print the first non-repeating character 
NO_OF_CHARS = 256

# Returns an array of size 256 containg count 
# of characters in the passed char array 
def getCharCountArray(string): 
    count = [0] * NO_OF_CHARS 
    for i in string: 
        count[ord(i)]+= 1
    return count 

# The function returns index of first non-repeating 
# character in a string. If all characters are repeating 
# then returns -1 
def firstNonRepeating(string): 
    count = getCharCountArray(string) 
    index = -1
    k = 0

    for i in string: 
        if count[ord(i)] == 1: 
            index = k 
            break
        k += 1

    return index 

# Driver program to test above function 
string = "tatkakaakakll"
index = firstNonRepeating(string) 
if index == 1: 
    print ("Either all characters are repeating or string is empty")
else: 
    print ("First non-repeating character is " + string[index])

# This code is contributed by Bhavya Jain 


First non-repeating character is l
