In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import time

# size of the training set
size=1000
# generate random data-set
np.random.seed(0)
x = np.random.rand(size, 1) 

           
y = 2 + 3 * x + 4* np.random.rand(size, 1)

# plot
plt.scatter(x,y,s=10)
plt.xlabel('x')
plt.ylabel('y')
plt.show()



In [None]:
class LinearRegressionUsingGD:
    def __init__(self, eta=0.05, n_iterations=100):
        self.eta = eta
        self.n_iterations = n_iterations

    def fit(self, x, y):
      

        self.cost_ = []
        self.w_ = np.zeros((x.shape[1], 1))  
        m = x.shape[0] 
 
        for _ in range(self.n_iterations):
            y_pred = np.dot(x, self.w_)
            residuals = y_pred - y     
            gradient_vector = np.dot(x.T, residuals)   
            self.w_ -= (self.eta / m) * gradient_vector 
            cost = np.sum((residuals ** 2)) / (2 * m)
            self.cost_.append(cost)
        return self  

    def predict(self, x):
        return np.dot(x, self.w_)

In [None]:
def scatter_plot(x, y, size=10, x_label='x', y_label='y', color='b'):
    plt.scatter(x, y, s=size, color=color)
    set_labels(x_label, y_label)


def plot(x, y, x_label='x', y_label='y', color='r'):
    plt.plot(x, y, color=color)
    set_labels(x_label, y_label)


def ploty(y, x_label='x', y_label='y'):
    plt.plot(y)
    set_labels(x_label, y_label)


def set_labels(x_label, y_label):
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.show()

In [None]:
m = x.shape[0]
xx = np.c_[np.ones((m, 1)), x]  

print(x.shape)
print(xx.shape)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(xx, y, test_size = 0.2, random_state=5)

In [None]:
from datetime import datetime

start_time = datetime.now()
# initializing the model
linear_regression_model = LinearRegressionUsingGD(eta=0.05, n_iterations=40)  # learning coeficient

# fit/train the model
linear_regression_model.fit(X_train, Y_train)

# predict values
y_train_predict = linear_regression_model.predict(X_train)

# model parameters
intercept, coeffs = linear_regression_model.w_

# cost_function
cost_function = linear_regression_model.cost_

print("Learning curve")

ploty(cost_function, 'no of iterations', 'cost function')
print("----------------------------------")

print("Model performance for training set")


start_time = datetime.now()  # calculating the time of proccess


# data points
XX_train= np.array([X_train.T[1]]).T

plt.scatter(XX_train, Y_train, s=10)
plt.xlabel('x_train')
plt.ylabel('y_train')

# predicted values
plt.plot(XX_train, y_train_predict  , color='r')
plt.show()

end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))

## async

In [None]:
import multiprocessing as mp
import timeit
# %timeit
start_time = datetime.now()

pool = mp.Pool(mp.cpu_count())
print(pool)
linear_regression_model = LinearRegressionUsingGD(eta=0.05, n_iterations=40)
linear_regression_model.fit(X_train, Y_train)
# y_train_predict = pool.apply(linear_regression_model.predict(X_train))
y_train_predict = pool.apply_async(linear_regression_model.predict, args=(X_train)) 
pool.close() 
end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))

## sync

In [None]:
import multiprocessing as mp
start_time = datetime.now()

pool = mp.Pool(8)
# print(pool)
linear_regression_model = LinearRegressionUsingGD(eta=0.05, n_iterations=40)
linear_regression_model.fit(X_train, Y_train)
y_train_predict = pool.map(linear_regression_model.predict, X_train)
pool.close()
end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))

## data parallelism

In [None]:
import pandas as pd
start_time = datetime.now()

df = pd.DataFrame(X_train) 
def parallelize_dataframe(df, func, n_cores=4):
    df_split = np.array_split(df, n_cores)
    pool = mp.Pool(n_cores)
    dff =  pool.map(func, df_split)
    dff = np.concatenate((dff[0],dff[1],dff[2],dff[3]),axis=0)
      
    pool.close()
    pool.join()
    return dff

parallelize_dataframe(df,linear_regression_model.predict,4)

end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))