In [91]:
from sklearn import datasets
import matplotlib.pyplot as plt
import copy

### Loading premade datasets

In [28]:
# Load digits dataset from sklearn that contains image recognition data for handwritten digits
digits = datasets.load_digits()


In [29]:
# Create features matrix,in this case an array of 64 length arrays corresponding to 8x8 grid of pixels?
features = digits.data
features

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [30]:
# shows which digit each array in features actually corresponds to
target = digits.target
target

array([0, 1, 2, ..., 8, 9, 8])

### Making sample data 

In [80]:
# Generate features matrix, target vector, and the true coefficients
features, target, coefficients = datasets.make_regression(n_samples = 100,
 n_features = 3,
 n_informative = 3,
 n_targets = 1,
 noise = 0.0,
 coef = True,
 random_state = 1)

In [81]:
print("features \n", features[:5])

features 
 [[ 1.29322588 -0.61736206 -0.11044703]
 [-2.793085    0.36633201  1.93752881]
 [ 0.80186103 -0.18656977  0.0465673 ]
 [ 0.12910158  0.50274088  1.6169496 ]
 [-0.69166075 -0.6871727  -0.39675353]]


In [82]:
print("dependent variables \n", target[:5])

dependent variables 
 [ -10.37865986   25.5124503    19.67705609  149.50205427 -121.65210879]


In [83]:
print(coefficients)

[44.19042807 98.97517077 58.15774073]


### Preprocessio

In [84]:
# Scales values to be between some range
# Normalization is the special case of rescaling when values are rescaled to [0..1]

In [85]:
from sklearn import preprocessing

In [86]:
features_c = features.copy()

In [87]:
# Create scaler object
scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))

In [88]:
# finds the min and max vals of the features array, adds them as attributes
# of the scaler object
scaler.fit(features_c)
print('min data: ', scaler.data_min_, '\n'
     'max data: ', scaler.data_max_)

min data:  [-2.793085   -2.3015387  -2.43483776] 
max data:  [2.44936865 2.18557541 2.52832571]


In [89]:
print("original features", features_c[:5])
features = scaler.transform(features_c)

original features [[ 1.29322588 -0.61736206 -0.11044703]
 [-2.793085    0.36633201  1.93752881]
 [ 0.80186103 -0.18656977  0.0465673 ]
 [ 0.12910158  0.50274088  1.6169496 ]
 [-0.69166075 -0.6871727  -0.39675353]]


In [90]:
print("transformed features", features_c[:5])

transformed features [[ 1.29322588 -0.61736206 -0.11044703]
 [-2.793085    0.36633201  1.93752881]
 [ 0.80186103 -0.18656977  0.0465673 ]
 [ 0.12910158  0.50274088  1.6169496 ]
 [-0.69166075 -0.6871727  -0.39675353]]


In [None]:
# can also do it in one step with fit_transform()


In [50]:
dir(scaler)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check_n_features',
 '_get_param_names',
 '_get_tags',
 '_more_tags',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_reset',
 '_validate_data',
 'copy',
 'data_max_',
 'data_min_',
 'data_range_',
 'feature_range',
 'fit',
 'fit_transform',
 'get_params',
 'inverse_transform',
 'min_',
 'n_features_in_',
 'n_samples_seen_',
 'partial_fit',
 'scale_',
 'set_params',
 'transform']

In [55]:
scaler.data_min_

AttributeError: 'MinMaxScaler' object has no attribute 'data_min_'