In [1]:
# We will start with the breast cancer study again ...

from sklearn.datasets import load_breast_cancer
dataset = load_breast_cancer()

In [2]:
# A little more information on the dataset - it is stored as a dictionary.
# Here are the keys
dataset.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])

In [3]:
# We can get the full description of the dataset by running print(cancer['DESCR'])
# The shape command shows we have 569 data points with 30 features
dataset['data'].shape

(569, 30)

In [4]:
# Let's set up out predictors and target data
X = dataset['data']
y = dataset['target']

In [5]:
# We'll use our familar test-train split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .25)

In [6]:
# Neural networks tend to like the input data to be scaled ... makes the algorithm converge
# faster. We use a built in function to do this.

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# This command 'tunes' the scaler to the train predictors ...
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [7]:
# Now use the scaler to scale both test and train predictors
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Now we build the neural network ...
from sklearn.neural_network import MLPClassifier

In [9]:
# Here we configure the architecure. These are hidden layers only
# The function will automatically create input nodes (one for each variable) and 
# one output node (for the target value)
mlp = MLPClassifier(hidden_layer_sizes=(30,30,30))

In [10]:
# Fit the model ... learn the weights
mlp.fit(X_train,y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(30, 30, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [11]:
# The activation method is called 'relu' which stands for "rectified linear unit' - it is 
# a line model. Let's now use the model to predict the target:

predictions = mlp.predict(X_test)

In [12]:
# Check the confusion matrix ...
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,predictions))

[[47  5]
 [ 0 91]]


In [13]:
# Pretty good. Let's see how it does with the banking data. We'll load the dataset and clean as before.
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [15]:
# The dataset provides the bank customers’ information. It includes 41,188 records and 21 fields.
data = pd.read_csv('PythonData/banking.csv', header=0)

# Here drop the NAs
data = data.dropna()
data.drop(data.columns[[0, 3, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19]], axis=1, inplace=True)
data2 = pd.get_dummies(data, columns =['job', 'marital', 'default', 'housing', 'loan', 'poutcome'])
data2.drop(data2.columns[[12, 16, 18, 21, 24]], axis=1, inplace=True)
X = data2.iloc[:,1:]
y = data2.iloc[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [16]:
# Again we scale the data ... although this is probably not needed (data is binary)
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [17]:
X_train.shape

(30891, 23)

In [18]:
# We'll build the network next. Let's do the same architecure as before, three
# hidden layers of the same size as the input data ...

mlp = MLPClassifier(hidden_layer_sizes=(23,23,23))

In [20]:
mlp.fit(X_train,y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(23, 23, 23), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [21]:
predictions = mlp.predict(X_test)

In [22]:
print(confusion_matrix(y_test,predictions))

[[9051  105]
 [ 920  221]]


In [23]:
# Not much better than logistic Regression. We may need to use other data variables
# or change the structure of the network.

In [37]:
# Let's try the Abalone dataset. 

data = pd.read_csv('PythonData/abalone.data', header=0)

In [38]:
print(data.describe())

            Length     Diameter       Height        WWGHT        SWGHT  \
count  4177.000000  4177.000000  4177.000000  4177.000000  4177.000000   
mean      0.523992     0.407881     0.139516     0.828742     0.359367   
std       0.120093     0.099240     0.041827     0.490389     0.221963   
min       0.075000     0.055000     0.000000     0.002000     0.001000   
25%       0.450000     0.350000     0.115000     0.441500     0.186000   
50%       0.545000     0.425000     0.140000     0.799500     0.336000   
75%       0.615000     0.480000     0.165000     1.153000     0.502000   
max       0.815000     0.650000     1.130000     2.825500     1.488000   

             VWGHT       SHWGHT        RINGS  
count  4177.000000  4177.000000  4177.000000  
mean      0.180594     0.238831     9.933684  
std       0.109614     0.139203     3.224169  
min       0.000500     0.001500     1.000000  
25%       0.093500     0.130000     8.000000  
50%       0.171000     0.234000     9.000000  
75% 

In [39]:
data_2 = data

In [49]:
print(data_2[1:10]['Sex'])

1    M
2    F
3    M
4    I
5    I
6    F
7    F
8    M
9    F
Name: Sex, dtype: object


In [63]:
# Here we convert the categories to numbers ...We will remove the 'I's first ...
data_3 = data_2[data_2['Sex'] != 'I']

In [64]:
data_3.shape

(2835, 9)

In [76]:
print(data_3[1:10]['Sex'])

1     1
2     F
3     1
6     F
7     F
8     1
9     F
10    F
11    1
Name: Sex, dtype: object


In [75]:
data_3['Sex'] = data_3['Sex'].replace(['M'], 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [77]:
data_3['Sex'] = data_3['Sex'].replace(['F'], 2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [78]:
print(data_3[0:10])

    Sex  Length  Diameter  Height   WWGHT   SWGHT   VWGHT  SHWGHT  RINGS
0     1   0.455     0.365   0.095  0.5140  0.2245  0.1010   0.150     15
1     1   0.350     0.265   0.090  0.2255  0.0995  0.0485   0.070      7
2     2   0.530     0.420   0.135  0.6770  0.2565  0.1415   0.210      9
3     1   0.440     0.365   0.125  0.5160  0.2155  0.1140   0.155     10
6     2   0.530     0.415   0.150  0.7775  0.2370  0.1415   0.330     20
7     2   0.545     0.425   0.125  0.7680  0.2940  0.1495   0.260     16
8     1   0.475     0.370   0.125  0.5095  0.2165  0.1125   0.165      9
9     2   0.550     0.440   0.150  0.8945  0.3145  0.1510   0.320     19
10    2   0.525     0.380   0.140  0.6065  0.1940  0.1475   0.210     14
11    1   0.430     0.350   0.110  0.4060  0.1675  0.0810   0.135     10


In [90]:
# Ok, we can ow split the data set and try to fit a neural network ..
X = data_3.iloc[:,1:]
y = data_3.iloc[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [91]:
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [92]:
X_train.shape

(2126, 8)

In [93]:
mlp = MLPClassifier(hidden_layer_sizes=(8,10,8))

In [94]:
mlp.fit(X_train,y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(8, 10, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [95]:
predictions = mlp.predict(X_test)

In [96]:
print(confusion_matrix(y_test,predictions))

[[245 127]
 [202 135]]


In [97]:
# Not great. Interestingly, we can do this on the original data set and predict all three:

X = data_2.iloc[:,1:]
y = data_2.iloc[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)


In [98]:
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [99]:
mlp = MLPClassifier(hidden_layer_sizes=(8,10,8))

In [100]:
mlp.fit(X_train,y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(8, 10, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [101]:
predictions = mlp.predict(X_test)

In [102]:
print(confusion_matrix(y_test,predictions))

[[ 10  45 261]
 [  6 281  72]
 [ 16  86 268]]


In [None]:
# Did better ... 