### **Model Training Using CPU Multi Cores**

In [1]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [2]:
import pandas as pd

In [3]:
#Dataset: https://archive.ics.uci.edu/ml/datasets/wine+quality
df=pd.read_csv("/gdrive/My Drive/winequality-white.csv",sep=";")
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [4]:
df['quality'].unique()

array([6, 5, 7, 8, 4, 3, 9])

In [5]:
df.shape

(4898, 12)

In [6]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [7]:
### Get the dependent features
X=df.iloc[:,:-1]
X.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9


In [8]:
y=df.iloc[:,-1]
y.head()

0    6
1    6
2    6
3    6
4    6
Name: quality, dtype: int64

#### **1 CPU Core**
1. Machine Learning algo like Random Forest

2. RepeatedStratifiedKFold

3. cross val score

In [9]:
from time import time
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [10]:
## CPU cores we use n_jobs
model=RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time=time()
n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=1)
end_time=time()
print(end_time-start_time)

13.957066535949707


#### **2 CPU Cores**

In [11]:
model=RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time=time()
n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=2)
end_time=time()
print(end_time-start_time)

11.975823163986206


#### **4 CPU Cores**

In [12]:
model=RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time=time()
n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=4)
end_time=time()
print(end_time-start_time)

11.841022729873657


#### **Lets utilize all the cores**

In [13]:
model=RandomForestClassifier(n_estimators=100)
# define the evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
start_time=time()
n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=-1)
end_time=time()
print(end_time-start_time)

10.772165775299072


In [14]:
cores=[1,2,3,4,5,6,7,8,9]
for core in cores:
    model=RandomForestClassifier(n_estimators=100)
    #stratified K Fold
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=4)
    start_time=time()
    n_scores =cross_val_score(model,df.iloc[:,:-1],df.iloc[:,-1],scoring='accuracy', cv=cv, n_jobs=core)
    end_time=time()
    total_time=end_time-start_time
    print("For Cores {} Training Time was {}secs".format(core,total_time))

For Cores 1 Training Time was 13.949216365814209secs
For Cores 2 Training Time was 10.55215573310852secs
For Cores 3 Training Time was 11.158464908599854secs
For Cores 4 Training Time was 11.071621656417847secs
For Cores 5 Training Time was 11.252355813980103secs
For Cores 6 Training Time was 11.109924554824829secs
For Cores 7 Training Time was 11.10416054725647secs
For Cores 8 Training Time was 11.442140102386475secs
For Cores 9 Training Time was 11.326849937438965secs
