## 1 ROC and AUC

In [3]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import warnings
warnings.filterwarnings('ignore')

In [4]:
filename = 'pima_data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = pd.read_csv(filename, names=names)
array = data.values
x = array[:, :8]
y = array[:, 8]
model = LogisticRegression()
kfold = KFold(n_splits=10, random_state=4)
scoring = 'roc_auc'
score = cross_val_score(model, x, y, cv=kfold, scoring=scoring)
print ("roc_auc result: %.3f(%.3f)" % (score.mean(), score.std()))

roc_auc result: 0.826(0.043)


## 2 classification report

In [6]:
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33,\
                                                   random_state=4)
model = LogisticRegression().fit(x_train, y_train)
y_predicted = model.predict(x_test)
report = classification_report(y_test, y_predicted)
print (report)

              precision    recall  f1-score   support

         0.0       0.84      0.88      0.86       171
         1.0       0.72      0.66      0.69        83

    accuracy                           0.81       254
   macro avg       0.78      0.77      0.78       254
weighted avg       0.80      0.81      0.80       254



## 3 confusion matrix

In [9]:
from sklearn.metrics import confusion_matrix

In [12]:
x_train, x_test, y_train, t_test = train_test_split(x, y, test_size=0.33, random_state=4)
model = LogisticRegression().fit(x_train, y_train)
y_predicted = model.predict(x_test)
confusion = confusion_matrix(y_test, y_predicted)
classes = ['0', '1']
dataframe = pd.DataFrame(data = confusion,
                       index = classes,
                       columns = classes)
print (dataframe)

     0   1
0  150  21
1   28  55


## 4 log loss

In [13]:
model = LogisticRegression()
scoring = 'neg_log_loss'
kfold = KFold(n_splits=10, random_state=4)
result = cross_val_score(model, x, y, cv=kfold, scoring=scoring)
print ("log loss:%.3f(%.3f)" % (result.mean(), result.std()))

log loss:-0.486(0.064)


## 5 mean absolute error

In [15]:
from sklearn.linear_model import LinearRegression

In [17]:
filename = 'housing.csv'
names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS',
         'RAD', 'TAX', 'PRTATIO', 'B', 'LSTAT', 'MEDV']
data = pd.read_csv(filename, names=names, delim_whitespace=True)

In [18]:
model = LinearRegression()
kfold = KFold(n_splits=10, random_state=4)
scoring = 'neg_mean_absolute_error'
result = cross_val_score(model, x, y, cv=kfold, scoring=scoring)
print ("MAE result: {}({})".format(result.mean(), result.std()))

MAE result: -0.3368537167214989(0.022362553209089305)


## 6 mean squared error

In [19]:
model = LinearRegression()
kfold = KFold(n_splits=10, random_state=4)
scoring = 'neg_mean_squared_error'
result = cross_val_score(model, x, y, cv=kfold, scoring=scoring)
print ("MSE result: {}({})".format(result.mean(), result.std()))

MSE result: -0.16281250654356544(0.022168476633581857)
