In [29]:
# Model evaluation
from sklearn import datasets
from sklearn import metrics 
from sklearn.model_selection import KFold, cross_val_score 
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.datasets import load_diabetes, load_iris, make_classification
from sklearn.dummy import DummyRegressor, DummyClassifier
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


In [2]:
# 11.1 Cross-validating models
digits = datasets.load_digits()
features = digits.data 
target = digits.target 
standardizer = StandardScaler()
logit = LogisticRegression()
pipeline = make_pipeline(standardizer, logit)
kf = KFold(n_splits=10, shuffle = True, random_state = 1)
cv_results = cross_val_score(pipeline, 
                            features, 
                            target, 
                            cv = kf, 
                            scoring = "accuracy", 
                            n_jobs = -1)

cv_results.mean()

# kfold cross-validation
# we split the data intro k parts called folds
# The model is then trained suing k - 1 folds - combined into training set - and then 
# the last fold as the test set 
# The performance on the model for each of the k oiterations is then averaged to produce an 
# overall measurement.




np.float64(0.9699472377405337)

In [3]:
cv_results

array([0.97777778, 0.98888889, 0.96111111, 0.94444444, 0.97777778,
       0.98888889, 0.95555556, 0.98882682, 0.97765363, 0.93854749])

In [None]:
# # important points:
# 1. Assume each observation was created independent from the others (IID)

# 2. when we use KFCV to evaluate a classifier, it is often beneficial to have folds 
# containing roughly the same percentage of observations from each of the different 
# target classes (stratified cross-validation)

# 3. when using validation sets or cross-validation, it is important to preprocess 
# data based on the training set and then apply those transformations to both 
# the training and test sets. 


In [5]:
features_train, features_test, target_train, target_test = train_test_split(features, 
                                                                            target, 
                                                                            test_size=0.1, 
                                                                            random_state = 1)
standardizer.fit(features_train)
features_train_std = standardizer.transform(features_train)
features_test_std = standardizer.transform(features_test)


In [6]:
pipeline = make_pipeline(standardizer, logit)
cv_results = cross_val_score(pipeline, 
                             features, 
                             target, 
                             cv=kf, 
                             scoring="accuracy", 
                             n_jobs = -1)


In [7]:
cv_results

array([0.97777778, 0.98888889, 0.96111111, 0.94444444, 0.97777778,
       0.98888889, 0.95555556, 0.98882682, 0.97765363, 0.93854749])

In [12]:
# 11.2 creating a baseline regression model 
diabetes = load_diabetes()
features, target = diabetes.data, diabetes.target 
features_train, features_test, target_train, target_test = train_test_split(
    features, target, random_state = 1)

dummy = DummyRegressor(strategy="mean")
dummy.fit(features_train, target_train)
dummy.score(features_test, target_test)

-0.007035918242200845

In [15]:
# to compare, we trian our model and evaluate the performance score 
ols = LinearRegression() 
ols.fit(features_train, target_train)

ols.score(features_test, target_test)

0.4439690125828355

In [16]:
clf = DummyRegressor(strategy="constant", constant=20)
clf.fit(features_train, target_train)
clf.score(features_test, target_test)




-3.1181539467984978

<img src="./assets/r_square.png">

In [18]:
# 11.3 Creating a baseline classification Model 
iris = load_iris() 
features, target = iris.data, iris.target 

features_train, features_test, target_train, target_test = train_test_split(
    features, target, random_state = 0
)

clf = DummyClassifier(strategy="uniform")
clf.fit(features_train, target_train)
clf.score(features_test, target_test)


0.34210526315789475

In [22]:
classifier = RandomForestClassifier()
classifier.fit(features_train, target_train)
classifier.score(features_test, target_test)

0.9736842105263158

In [24]:
# 11.4 Evaluating Binary Classifer Predictions 
x,y = make_classification(n_samples = 100000, 
                          n_features=3, 
                          n_informative = 3, 
                          n_redundant = 0, 
                          n_classes = 2, 
                          random_state = 1)
logit = LogisticRegression()
cross_val_score(logit, x, y, scoring="accuracy")


array([0.9322 , 0.933  , 0.9359 , 0.93375, 0.93705])

<img src="./assets/accuracy.png">

In [25]:
x, y = make_classification(n_samples = 10000, 
                           n_features = 3, 
                           n_informative = 3, 
                           n_redundant = 0, 
                           n_classes = 2, 
                           random_state = 1)
logit = LogisticRegression() 
cross_val_score(logit, x, y, scoring="accuracy")

array([0.9555, 0.95  , 0.9585, 0.9555, 0.956 ])

In [26]:
cross_val_score(logit, x, y, scoring="precision")

array([0.95963673, 0.94820717, 0.9635996 , 0.96149949, 0.96060606])

<img src="./assets/precision.png">

In [27]:
cross_val_score(logit, x, y, scoring="recall")


array([0.951, 0.952, 0.953, 0.949, 0.951])

<img src="./assets/recall_1.png">
<img src="./assets/recall_2.png">

In [28]:
cross_val_score(logit, x, y, scoring="f1")

array([0.95529884, 0.9500998 , 0.95827049, 0.95520886, 0.95577889])

<img src="./assets/f1.png">

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, 
                                                    )