### Import necessary Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Initializing theta and bias

In [None]:
def initialize(lenw):
    w = np.zeros((1, lenw))
    b = 0
    return w, b

### Hypothesis function

In [None]:
def hypothesis(X, w, b):
    z = np.dot(w, X) + b
    return z

### Cost function

In [None]:
def cost_function(z, y):
    m = y.shape[1]
    J = (1/(2*m))*np.sum(np.square(z-y))
    return J

### Gradient Descent

In [None]:
def gradient_descent(X,y, z):
    m = y.shape[1]
    dz = (z-y)
    dw = (1/m)*np.dot(dz, X.T)
    db = (1/m)*np.sum(dz)
    return dw, db

In [None]:
def gradient_descent_update(w, b, dw, db, learning_rate):
    w = w - learning_rate*dw
    b = b - learning_rate*db
    return w, b

# Linear Regression model

In [None]:
def linear_regression_model(X_train, y_train, X_val, y_val, learning_rate, epochs):
    lenw = X_train.shape[0]
    costs_train = []
    w, b = initialize(lenw)
    for i in range(1, epochs+1):
        z_train = hypothesis(X_train, w, b)
        cost_train = cost_function(z_train, y_train)
        dw, db = gradient_descent(X_train, y_train, z_train)
        w, b = gradient_descent_update(w, b, dw, db, learning_rate)
        if i % 10 == 0:
            costs_train.append(cost_train)

        z_val = hypothesis(X_val, w, b)





        cost_val = cost_function(z_val, y_val)
        if i % 20 == 0:

           print("Epochs "+ str(i)+ '/' + str(epochs)+ ": ")
           print("Training cost " + str(cost_train)+'|'+'Validation cost ' + str(cost_val))


    plt.plot(costs_train)
    plt.xlabel("Iterations")
    plt.ylabel("Training cost")
    plt.title("Learning rate" + str(learning_rate))
    plt.show()
    return w, b

## Uploading the datasets

In [None]:
df = pd.read_csv("../input/linear-reg-woc/Linear_train.csv")
df_test = pd.read_csv("../input/linear-reg-woc/Linear_test.csv")
y = df["label"]
y_tests = df_test["label"]
print(y.shape)
df.drop(df.columns[[0, 21]], axis=1 , inplace=True)
df_test.drop(df_test.columns[[0, 21]], axis=1 , inplace=True)


## Data preprocessing

In [None]:
x_train_scaled = (df - df.mean())/(df.std())

x_test_scaled = (df_test - df_test.mean())/(df_test.std())
x_train = x_train_scaled.values
x_train = x_train.T
print(x_train.shape)

y_train = np.array([y])
print(y_train.shape)
x_test = x_test_scaled.values
x_test = x_test.T
y_test = np.array([y_tests])
print(y_test.shape)

## Fitting with training samples

In [None]:
w, b = linear_regression_model(x_train, y_train, x_test, y_test, 0.01, 1000)

In [None]:
targets = y_test
predictions1 = hypothesis(x_test, w, b)

## Root mean squared error

In [None]:
def rmse(predictions1, targets):
    return np.sqrt((np.square(predictions1 - targets)).mean())
rms_error = rmse(predictions1, targets)
print("rmse_error = ", rms_error)

## Root mean squared error from Sklearn module

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train.T, y_train.T)
predictions = model.predict(x_test.T)
from sklearn.metrics import mean_squared_error
lin_mse = mean_squared_error(y_test.T, predictions)
lin_rmse = np.sqrt(lin_mse)
print("rmse_error using sckit-learn model: ", lin_rmse)