In [1]:
# scaling the dataset
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
# prepare dataset
X, y = make_blobs(n_samples=100, centers=2, n_features=2, random_state=1)

In [3]:
# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [4]:
# define scaler
scaler = MinMaxScaler()

In [5]:
# fit scaler on the training dataset
scaler.fit(X_train)

In [6]:
# transform both datasets
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
# summarize the scale of each input variable
for i in range(X_test.shape[1]):
    print('>%d, train: min=%.3f, max=%.3f, test: min=%.3f, max=%.3f' %
          (i, X_train_scaled[:, i].min(), X_train_scaled[:, i].max(),
           X_test_scaled[:, i].min(), X_test_scaled[:, i].max()))

>0, train: min=0.000, max=1.000, test: min=0.047, max=0.964
>1, train: min=0.000, max=1.000, test: min=0.063, max=0.955


In [8]:
# fitting a model on the scaled dataset
from sklearn.linear_model import LogisticRegression
from pickle import dump

In [9]:
# split data into train and test sets
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.33, random_state=1)

In [10]:
# define scaler
scaler = MinMaxScaler()

In [11]:
# fit scaler on the training dataset
scaler.fit(X_train)

In [12]:
# transform the training dataset
X_train_scaled = scaler.transform(X_train)

In [13]:
# DEFINE MODEL
model = LogisticRegression(solver='lbfgs')
model.fit(X_train_scaled, y_train)

In [14]:
# save the model
dump(model, open('model.pkl', 'wb'))

In [15]:
# save the scaler
dump(scaler, open('scaler.pkl', 'wb'))

In [16]:
# THE MODEL OBJECT: model.pkl
# THE SCALER OBJECT: scaler.pkl
# load model and scaler and make predictions on new data
from sklearn.metrics import accuracy_score
from pickle import load

In [17]:
# split data into train and test sets
_, X_test, _, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [18]:
# load the model
model = load(open('model.pkl', 'rb'))

In [19]:
# load the scaler
scaler = load(open('scaler.pkl', 'rb'))

In [20]:
# check scale of the set before scaling
print('RAW test set range')
for i in range(X_test.shape[1]):
    print('>%d, min=%.3f, max=%.3f' % (i, X_test[:, i].min(), X_test[:, i].max()))

RAW test set range
>0, min=-11.270, max=0.085
>1, min=-5.581, max=5.926


In [21]:
# transform the test dataset
X_test_scaled = scaler.transform(X_test)
print('SCALED test set range')
for i in range(X_test_scaled.shape[1]):
    print('>%d, min=%.3f, max=%.3f' % (i, X_test_scaled[:, 1].min(), X_test_scaled[:, i].max()))

SCALED test set range
>0, min=0.063, max=0.964
>1, min=0.063, max=0.955


In [22]:
# make prediction on the test set
yhat = model.predict(X_test_scaled)

In [23]:
# evaluate accuracy
acc = accuracy_score(y_test, yhat)
print('TEST ACCURACY:', acc)

TEST ACCURACY: 1.0
