In [1]:
# evaluating of a model using all input features
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
# load the dataset
def load_dataset(filename):
    # load the dataset
    data = read_csv(filename, header=0)
    # retrive array
    dataset = data.values
    # split into input and output varriables
    X = dataset[:, :-1]
    y = dataset[:, -1]
    # format all fields as string
    X = X.astype(str)
    return X, y

In [3]:
# prepare inpute data
def prepare_inputs(X_train, X_test):
    oe = OrdinalEncoder()
    oe.fit(X_train)
    X_train_enc = oe.transform(X_train)
    X_test_enc = oe.transform(X_test)
    return X_train_enc, X_test_enc

In [4]:
# prepare target
def prepare_targets(y_train, y_test):
    le = LabelEncoder()
    le.fit(y_train)
    y_train_enc = le.transform(y_train)
    y_test_enc = le.transform(y_test)
    return y_train_enc, y_test_enc

In [5]:
# load the dataset
X, y = load_dataset('breast-cancer.csv')

In [6]:
# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [7]:
# prepare input data
X_train_enc, X_test_enc = prepare_inputs(X_train, X_test)

In [8]:
# prepare output data
y_train_enc, y_test_enc = prepare_targets(y_train, y_test)

In [9]:
# fit the model
model = LogisticRegression(solver='lbfgs')
model.fit(X_train_enc, y_train_enc)

In [10]:
# evaluate the model
yhat = model.predict(X_test_enc)

In [11]:
# evaluate predictions
accuracy = accuracy_score(y_test_enc, yhat)
print('ACCURACY: %.2f' % (accuracy*100))

ACCURACY: 74.44
