## Model Training using CPU Multi Cores

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("Dataset/winequality-white.csv", sep=";")
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [3]:
df["quality"].unique()

array([6, 5, 7, 8, 4, 3, 9])

In [5]:
df.shape

(4898, 12)

In [6]:
# Get the Independent feature

X = df.iloc[:,:-1]
X.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9


In [7]:
# Get the Dependent Feature

Y = df.iloc[:,-1]
Y.head()

0    6
1    6
2    6
3    6
4    6
Name: quality, dtype: int64


## 1 CPU Core
- Machine Learning algo like Random Forest
- RepeatedStratifiedKFold
- cross val score

In [8]:
from time import time
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [9]:
# CPU cores we use n_jobs

model = RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time = time()
n_scores = cross_val_score(model,X,Y, scoring = "accuracy", cv = cv, n_jobs = 1)
end_time = time()
print(end_time-start_time)



27.772057056427002



## 2 CPU Cores

In [10]:

model = RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time = time()
n_scores = cross_val_score(model,X,Y, scoring = "accuracy", cv = cv, n_jobs = 2)
end_time = time()
print(end_time-start_time)

20.218788862228394


## 4 CPU Cores

In [11]:
model = RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time = time()
n_scores = cross_val_score(model,X,Y, scoring = "accuracy", cv = cv, n_jobs = 4)
end_time = time()
print(end_time-start_time)

13.282578706741333


## 8 CPU Cores

In [12]:
model = RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time = time()
n_scores = cross_val_score(model,X,Y, scoring = "accuracy", cv = cv, n_jobs = 8)
end_time = time()
print(end_time-start_time)

10.455442190170288


## Lets utilize all the cores

In [13]:

model=RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time=time()
n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=-1)
end_time=time()
print(end_time-start_time)

8.129827976226807


In [14]:
cores=[1,2,3,4,5,6,7,8,9,12,16,18,19,32]
for core in cores:
    model=RandomForestClassifier(n_estimators=100)
    #stratified K Fold
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
    start_time=time()
    n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=core)
    end_time=time()
    total_time=end_time-start_time
    print("For Cores {} Training Time was {}secs".format(core,total_time))

For Cores 1 Training Time was 26.532588005065918secs
For Cores 2 Training Time was 20.65456247329712secs
For Cores 3 Training Time was 14.761335134506226secs
For Cores 4 Training Time was 13.22062635421753secs
For Cores 5 Training Time was 11.497109413146973secs
For Cores 6 Training Time was 6.633298873901367secs
For Cores 7 Training Time was 11.966639518737793secs
For Cores 8 Training Time was 6.960367441177368secs
For Cores 9 Training Time was 6.854358196258545secs
For Cores 12 Training Time was 7.520061016082764secs
For Cores 16 Training Time was 5.266190767288208secs
For Cores 18 Training Time was 8.515698194503784secs
For Cores 19 Training Time was 7.910419464111328secs
For Cores 32 Training Time was 15.38734769821167secs
