In [1]:

# Objective here was check the variation of the run-time / execution time for an ML training / testing process, 
# as the number of cores was modified / increased


In [6]:
print(__doc__)

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

import time


Automatically created module for IPython interactive environment


In [20]:

const_default_num_jobs = 1
const_num_executions_to_evaluate = 1000


In [26]:
# The implementation of the function below uses code from here :
# # From - http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html#sphx-glr-auto-examples-linear-model-plot-ols-py
# Code source: Jaques Grobler
# License: BSD 3 clause

# Define a convenience function to perform a Linear Fit on the Diabetes Database, using parameterized number of cores

def perform_linear_fit(param_n_jobs = const_default_num_jobs):

    # Load the diabetes dataset
    diabetes = datasets.load_diabetes()

    # Use only one feature
    diabetes_X = diabetes.data[:, np.newaxis, 2]

    # Split the data into training/testing sets
    diabetes_X_train = diabetes_X[:-20]
    diabetes_X_test = diabetes_X[-20:]

    # Split the targets into training/testing sets
    diabetes_y_train = diabetes.target[:-20]
    diabetes_y_test = diabetes.target[-20:]

    # Create linear regression object
    regr = linear_model.LinearRegression(n_jobs = param_n_jobs)

    # Train the model using the training sets
    start_time = time.time()
    regr.fit(diabetes_X_train, diabetes_y_train)
    end_time = time.time()
    time_delta = end_time - start_time

    # Make predictions using the testing set
    diabetes_y_pred = regr.predict(diabetes_X_test)

    return time_delta


In [29]:

def evaluate_linear_fit(param_n_jobs = const_default_num_jobs, param_n_evaluations = const_num_executions_to_evaluate):
    time_delta_list = list()
    for evaluation in range(param_n_evaluations):
        time_delta = perform_linear_fit(param_n_jobs)
        time_delta_list.append(time_delta)
    average_time_delta = np.mean(time_delta_list)
    print("Average Fitting time with : " + str(param_n_jobs) + " cores and with " + str(param_n_evaluations) + " iterations is: " + str(time_delta) + " seconds." )
    return average_time_delta


In [31]:

# Run with one core

evaluate_linear_fit(1, const_num_executions_to_evaluate)


Average Fitting time with : 1 cores and with 1000 iterations is: 0.0006468296051025391 seconds.


0.00040389299392700194

In [32]:

# Run with one core

evaluate_linear_fit(2, const_num_executions_to_evaluate)


Average Fitting time with : 2 cores and with 1000 iterations is: 0.0004019737243652344 seconds.


0.00042050838470458985

In [33]:
# Run with one core

evaluate_linear_fit(3, const_num_executions_to_evaluate)


Average Fitting time with : 3 cores and with 1000 iterations is: 0.000347137451171875 seconds.


0.00040815544128417971

In [34]:

# Run with one core

evaluate_linear_fit(4, const_num_executions_to_evaluate)


Average Fitting time with : 4 cores and with 1000 iterations is: 0.0003681182861328125 seconds.


0.00039053034782409666

In [35]:

# Run with one core

evaluate_linear_fit(-1, const_num_executions_to_evaluate)


Average Fitting time with : -1 cores and with 1000 iterations is: 0.00034689903259277344 seconds.


0.00035011863708496091