In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = MinMaxScaler()
std_scaler = StandardScaler()

In [2]:
def step_gradient(X, Y, learning_rate, m):
    M = X.shape[0]
    N = X.shape[1]
    m_slope = np.zeros(N)
    for i in range(M):
        x = X[i]
        y_pred = (m*x).sum()
        for j in range(N):
            m_slope[j] += (-2/M)*(Y[i] - y_pred)*x[j]
    m = m - learning_rate*(m_slope)
    return m  

def gd(X, Y, learning_rate, num_iterations):
    m = np.zeros(X.shape[1])
    for i in range(num_iterations):
        m = step_gradient(X, Y, learning_rate, m)
    print(i+1, 'Cost: ', cost(X, Y, m))
    return m

def cost(X, Y, m):
    return ((Y - np.sum(m*X, axis = 1))**2).mean()

def predict(X, m):
    return np.sum(m*X, axis = 1)

def score(y_truth, y_pred):
    u = ((y_truth - y_pred)**2).sum()
    v = ((y_truth - y_truth.mean())**2).sum()
    return 1-u/v
    

### with feature scaling

In [5]:
def run():
    #df  = pd.read_csv('0000000000002417_training_boston_x_y_train.csv')
    #print(df.head(5))
    data = np.genfromtxt('trainlast.csv', delimiter = ',')
    print(data.shape)
    print(data[0])
    target = np.genfromtxt('testlast.csv', delimiter = ',')
    print(target.shape)
    x_train, y_train = data[:,:-1], data[:, -1]
    print(x_train.shape, y_train.shape)
    x_test = target
    reshape_value = x_test.shape[0]
    
    #x_train = std_scaler.fit_transform(x_train)
    #x_test = std_scaler.transform(x_test)
    x_test = std_scaler.fit_transform(x_test)
    x_train = std_scaler.transform(x_train)
    
    print(x_train[0])
    
    x_train = np.insert(x_train, x_train.shape[1], 1, axis = 1)
    x_test = np.insert(x_test, x_test.shape[1], 1, axis = 1)
    
    learning_rate = 0.1
    num_iterations = 500
    m = gd(x_train, y_train, learning_rate, num_iterations)
    Y_pred = predict(x_test, m)
    #print(Y_pred.shape)
    
    Y_pred = Y_pred.reshape(reshape_value, 1)
    np.savetxt('y_predictions_boston.csv', Y_pred, delimiter = ',')
    
run() 

(7176, 5)
[   8.58   38.38 1021.03   84.37  482.26]
(2392, 4)
(7176, 4) (7176,)
[-1.5082328  -1.2705618   1.32815576  0.75466221]
500 Cost:  20.911748993218566


### without feature scaling

In [12]:
def run():
    data = np.loadtxt('trainlast.csv', delimiter = ',')
    print(data.shape)
    print(data[0])
    target = np.loadtxt('testlast.csv', delimiter = ',')
    print(target.shape)
    x_train, y_train = data[:,:-1], data[:, -1]
    print(x_train.shape, y_train.shape)
    x_train = np.insert(x_train, x_train.shape[1], 1, axis = 1)
    
    x_test = target
    reshape_value = x_test.shape[0]
    x_test = np.insert(x_test, x_test.shape[1], 1, axis = 1)
    
    learning_rate = 0.000001
    num_iterations = 500
    m = gd(x_train, y_train, learning_rate, num_iterations)
    Y_pred = predict(x_test, m)
    #print(Y_pred.shape)
    Y_pred = Y_pred.reshape(reshape_value, 1)
    np.savetxt('bostonalways.csv', Y_pred, delimiter = ',')
    
run()   

(7176, 5)
[   8.58   38.38 1021.03   84.37  482.26]
(2392, 4)
(7176, 4) (7176,)
500 Cost:  1.1335256453783181e+35
