In [1]:
import numba
from numba import jit, njit, vectorize
import numpy as np
import math
import random
import pandas as pd

In [2]:
import time

In [126]:
def logistic_regression(X,Y, w, iterations=1000):
    
    for i in range(iterations):
        w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X)
    return w

In [141]:
run_parallel = numba.config.NUMBA_NUM_THREADS > 1

@njit(parallel=run_parallel, fastmath=True)
def parallel_logistic_regression(X,Y,w, iterations=1000):
    
    for i in range(iterations):
        w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X)
    return w  

In [91]:
def generate_data():
    
    try :
        
        df = pd.read_csv("LogReg.csv")
        
    except Exception as e:
        
        df = pd.DataFrame()        
        dim, n, iterations = 2, 10000, 10
        points=[random.uniform(1.8,2.4) if i<n//2 else random.uniform(2.3,2.9) for i in range(n)]
        relation=[random.uniform(1.5,4.5) if i<n//2 else random.uniform(4.2,6.2) for i in range(n)]
        labels = [0 if i<n//2 else 1 for i in range(n)]
        points, labels = np.array(points), np.array(labels)
        
        print("Points : ",points[:10])
        print("Relation : ",relation[:10])
        print("Labels : ",labels[:10])
        
        df['points']=points
        df['relation']=relation
        df['y']=labels
        
        df.to_csv("LogReg.csv", index=False)


In [92]:
generate_data()

Points :  [2.13812739 1.83594296 2.39481262 2.01804365 2.22318889 2.05347926
 2.12447765 2.30259667 2.07008297 2.03928762]
Relation :  [3.0328152120221947, 1.6369077733451685, 2.7197463432569684, 3.2403123857193687, 3.1107413425692623, 2.834948471313567, 3.1442585737612263, 4.249846036398113, 2.57430511540217, 4.0314415662144105]
Labels :  [0 0 0 0 0 0 0 0 0 0]


In [103]:
def generate_test_data():
    try:
        df = pd.read_csv("Testing.csv")
    except Exception as e:
        
        points = [random.uniform(1.9, 3.1) for i in range(1000)]
        relation = [random.uniform(1.4, 6.5) for i in range(1000)]
        labels = [0 if points[i]< 2.4 else 1 for i in range(1000)]

        test = pd.DataFrame()
        test['points'] = points
        test['relation'] = relation
        test['y'] = labels
        
        print("Points : ",points[:10])
        print("Relation : ",relation[:10])
        print("Labels : ",labels[:10])

        test.to_csv("Testing.csv", index=False)

In [104]:
generate_test_data()

Points :  [2.962948507804674, 2.125315016670857, 2.499591896697716, 3.089571837085578, 2.163159857321397, 2.5131247222776842, 2.8978844396508157, 2.2037906780038083, 2.5046643683382324, 2.7322073843502217]
Relation :  [5.986958765390986, 3.2988725138524613, 3.935138737866737, 3.440338394448234, 3.1742664527653375, 3.274767200817225, 5.686109665195595, 2.2445746361324757, 4.583552382244672, 3.707247775556089]
Labels :  [1, 0, 1, 1, 0, 1, 1, 0, 1, 1]


In [139]:
def compare_time():
    
    w = 2.0 * np.ones(2) - 1.3
    df = pd.read_csv("LogReg.csv")
    x,y = df.iloc[:,[0,1]].values, df.iloc[:,2].values
    
    start_s = time.time()
    serial_mod = logistic_regression(x,y, w)
    end_s = time.time()
    stime=end_s-start_s
    print("Serial execution time = {} seconds".format(end_s - start_s))
    
    start_p = time.time()
    parallel_mod = parallel_logistic_regression(x,y, w)
    end_p = time.time()
    ptime = end_p-start_p
    print("Parallel execution time = {} seconds".format(end_p - start_p))
    if serial_mod.sum() == parallel_mod.sum():
        print("Succesful Execution!")
    print("\nComparison ...\n")
    if ptime<stime:
        st="Parallel faster by : {} seconds".format(stime-ptime ) 
    else:
        st = "Serial faster by : {}".format(ptime-stime)
    print(st)

In [143]:
compare_time()

Serial execution time = 0.5779614448547363 seconds
Parallel execution time = 0.17570137977600098 seconds
Succesful Execution!

Comparison ...

Parallel faster by : 0.40226006507873535 seconds
