In [77]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_absolute_error as mae

In [62]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [63]:
df = pd.read_csv('iris.csv')
scale_vars = list(df.select_dtypes([np.number]).columns)

In [68]:
encoder = LabelEncoder()
scaler = MinMaxScaler()

df['species'] = encoder.fit_transform(df['species'])
df[scale_vars] = scaler.fit_transform(df[scale_vars])
X, y = train_test_split(df)

In [89]:
model = LogisticRegression(
    solver='lbfgs',
    multi_class='auto'
)
model.fit(X.drop('species', axis=1), X['species'])

train_predict = model.predict(X.drop('species', axis=1))
test_predict = model.predict(y.drop('species', axis=1))

# Train Error + Score

train_error = mae(
    train_predict, 
    X['species']
)

train_score = model.score(
    X.drop('species', axis=1), 
    X['species'].values.reshape(-1, 1)
)

# Test Error + Score

test_error = mae(
    test_predict, 
    y['species']
)

test_score = model.score(
    y.drop('species', axis=1), 
    y['species'].values.reshape(-1, 1)
)


print('Train Error: {}'.format(train_error))
print('Train Score: {}'.format(train_score))
print('Test Error: {}'.format(test_error))
print('Test Score: {}'.format(test_score))

Train Error: 0.07142857142857142
Train Score: 0.9285714285714286
Test Error: 0.13157894736842105
Test Score: 0.868421052631579


In [90]:
train_error + train_score

1.0