In [1]:
import pandas as pd, numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

In [5]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pd.read_csv(url, names=names)

In [6]:
dataframe.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [8]:
x = dataframe.drop(columns='class')
y = dataframe['class']

In [16]:
y.value_counts()

0    500
1    268
Name: class, dtype: int64

In [9]:
from modelhelper import data_split_standardise, model_performance_classification

In [11]:
x_train_std,x_test_std,y_train,y_test=data_split_standardise(x,y)

In [12]:
from sklearn.ensemble import RandomForestClassifier

In [24]:
rfModel=RandomForestClassifier(n_estimators=100,min_samples_split=10,min_samples_leaf=12,
                               class_weight={0:0.5,1:1.25},oob_score=True)

rfModel.fit(x_train_std,y_train)

model_performance_classification(rfModel,(x_train_std,y_train),(x_test_std,y_test))

############### Classification Report: Train Data #############################
              precision    recall  f1-score   support

           0       0.92      0.74      0.82       370
           1       0.65      0.88      0.75       206

    accuracy                           0.79       576
   macro avg       0.78      0.81      0.78       576
weighted avg       0.82      0.79      0.79       576

                                                                               
############### Classification Report: Test Data #############################
              precision    recall  f1-score   support

           0       0.90      0.75      0.82       130
           1       0.61      0.82      0.70        62

    accuracy                           0.77       192
   macro avg       0.75      0.78      0.76       192
weighted avg       0.80      0.77      0.78       192



In [25]:
rfModel.feature_importances_

array([0.05546154, 0.29728991, 0.03809301, 0.05565141, 0.05197423,
       0.22229586, 0.09077485, 0.1884592 ])

In [26]:
x.columns

Index(['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'], dtype='object')

In [27]:
pd.DataFrame({'Feature Name':x.columns,'Importance':rfModel.feature_importances_})

Unnamed: 0,Feature Name,Importance
0,preg,0.055462
1,plas,0.29729
2,pres,0.038093
3,skin,0.055651
4,test,0.051974
5,mass,0.222296
6,pedi,0.090775
7,age,0.188459


In [30]:
rfModel.oob_score_

0.7256944444444444

In [40]:
rfModel=RandomForestClassifier(n_estimators=1000,min_samples_split=10,min_samples_leaf=12,
                               class_weight={0:0.5,1:1.25},oob_score=True,n_jobs=-1,verbose=True,max_features=6)

rfModel.fit(x_train_std,y_train)

model_performance_classification(rfModel,(x_train_std,y_train),(x_test_std,y_test))

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 446 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 796 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    1.4s finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.1s
[Parallel(n_jobs=2)]: Done 446 tasks      | elapsed:    0.1s
[Parallel(n_jobs=2)]: Done 796 tasks      | elapsed:    0.2s
[Parallel(n_jobs=2)]: Done 1000 out of 1000 | elapsed:    0.3s finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 196 tasks      | elapsed:    0.0s


############### Classification Report: Train Data #############################
              precision    recall  f1-score   support

           0       0.92      0.75      0.83       370
           1       0.66      0.89      0.76       206

    accuracy                           0.80       576
   macro avg       0.79      0.82      0.79       576
weighted avg       0.83      0.80      0.80       576

                                                                               
############### Classification Report: Test Data #############################
              precision    recall  f1-score   support

           0       0.90      0.77      0.83       130
           1       0.63      0.82      0.71        62

    accuracy                           0.79       192
   macro avg       0.77      0.80      0.77       192
weighted avg       0.81      0.79      0.79       192



[Parallel(n_jobs=2)]: Done 796 tasks      | elapsed:    0.2s
[Parallel(n_jobs=2)]: Done 1000 out of 1000 | elapsed:    0.2s finished


In [41]:
x.columns

Index(['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'], dtype='object')

In [46]:
rfModel.n_features_



8

In [47]:
for i in rfModel.estimators_:
    model=i
    print(model.n_features_in_)

8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
