## Generalization of XOR: parity function problem

Generalize the previous analysis with functions taking 3 to 10 input variables.  
We shall use a dataset to train on the parity function problem:
fonction returns 1 when there is an even number of 1-valued attributes; else 0.

In [1]:
%run utils/helper_functions.ipynb

In [2]:
%run utils/preparation.ipynb

In [3]:
%run utils/exploration.ipynb

In [4]:
%run utils/MLP_utils.ipynb

#### Load attribute dictionary

In [5]:
# Load encoded attribute values from the Breast Cancer dataset
fpath = "/tmp/DM2_attr_val_encoded.json"
attr_val_breast_dataset_encoded = load_json(fpath)
    
# Preview
for k, v in attr_val_breast_dataset_encoded.items():
    print(k, v)

age [2, 3, 4, 1, 5, 0]
menopause [2, 0, 1]
tumor_size [2, 6, 5, 4, 7, 1, 0, 3, 8, 10, 9]
inv_nodes [0, 4, 2, 5, 6, 3, 1]
node_caps [2, 1, 0]
deg_malig [2, 0, 1]
breast [1, 0]
breast_quad [3, 1, 2, 5, 4, 0]
irradiat [0, 1]
Class [1, 0]


We then generate sample records randomly and classify them using the parity function

### Run MLP model training pipeline for XOR datasets (3 to 10 features) 

In [6]:
def XOR_generate_train(n_attributes):

    print("{} attributes:".format(n_attributes))
    
    #print("Training for {n_attributes}-attributes XOR dataset:".format(n_attributes))
    
    XOR_df = generate_XOR_dataset(n_examples=800,
                         attr_dict=attr_val_breast_dataset_encoded,
                         n_attributes=n_attributes,
                         no_duplicate=False)
    
    X, y = get_nn_inputs(XOR_df)
    
    pprint_X_y(X, y)
    
    best_model = find_best_model(X, y, n_iter=200,
                                 max_layers=n_attributes-1,
                                 max_neurons=15)
    
    pprint_best_model(best_model)

In [7]:
# for n_attributes in range(3, 11):
    
#     print("{} attributes:".format(n_attributes))
    
#     #print("Training for {n_attributes}-attributes XOR dataset:".format(n_attributes))
    
#     XOR_df = generate_XOR_dataset(n_examples=800,
#                          attr_dict=attr_val_breast_dataset_encoded,
#                          n_attributes=n_attributes,
#                          no_duplicate=False)
    
#     X, y = get_nn_inputs(XOR_df)
    
#     pprint_X_y(X, y)
    
#     best_model = find_best_model(X, y, n_iter=300,
#                                  max_layers=n_attributes-1,
#                                  max_neurons=15)
    
#     pprint_best_model(best_model)
#     print('\t---------------------------\n')

In [None]:
XOR_generate_train(n_attributes=3)

3 attributes:
Dataset size: 800
Sample records:
(0, 1, 2)
(0, 2, 2)
(1, 1, 0)
(1, 6, 1)
(0, 4, 2)
(0, 2, 1)
(1, 1, 1)
(0, 6, 2)
(0, 1, 0)
(0, 2, 1)

Sample labels
[0, 1, 1, 1, 1, 0, 0, 1, 0, 0]
best params [score=1.0]:
{
  "activation": "logistic",
  "alpha": 0.0073495918367346945,
  "hidden_layer_sizes": [
    12,
    13
  ],
  "learning_rate": "constant",
  "learning_rate_init": 0.009393877551020408,
  "max_iter": 1000,
  "momentum": 0.9,
  "solver": "lbfgs"
}


In [9]:
XOR_generate_train(n_attributes=4)

4 attributes:
Dataset size: 800
Sample records:
(1, 1, 1, 1)
(5, 1, 0, 1)
(4, 5, 0, 5)
(5, 5, 1, 10)
(1, 4, 0, 1)
(5, 4, 0, 1)
(1, 1, 1, 1)
(1, 1, 1, 1)
(3, 0, 0, 0)
(2, 0, 1, 4)

Sample labels
[1, 1, 1, 0, 1, 0, 1, 1, 1, 0]
best params [score=0.9675]:
{
  "activation": "tanh",
  "alpha": 0.00673795918367347,
  "hidden_layer_sizes": [
    11,
    11,
    8
  ],
  "learning_rate": "invscaling",
  "learning_rate_init": 0.008383673469387756,
  "max_iter": 1000,
  "momentum": 0.1,
  "solver": "adam"
}


In [10]:
XOR_generate_train(n_attributes=5)

5 attributes:
Dataset size: 800
Sample records:
(1, 1, 1, 1, 1)
(0, 2, 1, 0, 1)
(0, 0, 0, 2, 8)
(1, 2, 0, 1, 1)
(0, 1, 0, 0, 1)
(1, 1, 6, 1, 1)
(0, 1, 6, 2, 4)
(0, 2, 5, 2, 10)
(1, 1, 1, 1, 1)
(0, 1, 4, 0, 2)

Sample labels
[0, 1, 1, 0, 1, 1, 0, 1, 0, 0]
best params [score=0.86375]:
{
  "activation": "tanh",
  "alpha": 0.01,
  "hidden_layer_sizes": [
    15,
    15,
    7,
    4
  ],
  "learning_rate": "invscaling",
  "learning_rate_init": 0.002322448979591837,
  "max_iter": 1200,
  "momentum": 0.7000000000000001,
  "solver": "adam"
}


In [11]:
XOR_generate_train(n_attributes=6)

6 attributes:
Dataset size: 800
Sample records:
(1, 1, 1, 3, 1, 2)
(1, 0, 2, 6, 0, 2)
(0, 1, 1, 1, 1, 1)
(1, 0, 3, 1, 0, 1)
(5, 1, 0, 3, 0, 0)
(5, 0, 1, 1, 1, 0)
(1, 1, 1, 1, 1, 0)
(1, 1, 1, 1, 1, 2)
(1, 0, 0, 3, 1, 0)
(1, 0, 1, 1, 1, 1)

Sample labels
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0]
best params [score=0.78875]:
{
  "activation": "tanh",
  "alpha": 0.0077573469387755105,
  "hidden_layer_sizes": [
    12,
    9,
    15,
    13,
    3
  ],
  "learning_rate": "adaptive",
  "learning_rate_init": 0.0077775510204081645,
  "max_iter": 1200,
  "momentum": 0.7000000000000001,
  "solver": "lbfgs"
}


In [None]:
XOR_generate_train(n_attributes=7)

7 attributes:
Dataset size: 800
Sample records:
(4, 0, 1, 1, 0, 1, 1)
(4, 1, 4, 4, 1, 0, 10)
(2, 0, 3, 1, 1, 2, 0)
(0, 0, 5, 2, 1, 2, 10)
(4, 0, 1, 2, 0, 0, 10)
(2, 0, 4, 1, 1, 2, 8)
(1, 1, 1, 1, 1, 0, 1)
(2, 0, 0, 5, 0, 1, 3)
(3, 0, 2, 1, 1, 1, 3)
(1, 0, 0, 0, 0, 1, 9)

Sample labels
[1, 1, 1, 0, 0, 1, 1, 0, 0, 1]
best params [score=0.69]:
{
  "activation": "tanh",
  "alpha": 0.008165102040816326,
  "hidden_layer_sizes": [
    9,
    14,
    12,
    5,
    15,
    15
  ],
  "learning_rate": "constant",
  "learning_rate_init": 0.008585714285714287,
  "max_iter": 1000,
  "momentum": 0.9,
  "solver": "adam"
}


In [None]:
# Kernel crash on jupyter notebook; need to tune these down
# XOR_generate_train(n_attributes=8)
# XOR_generate_train(n_attributes=9)
# XOR_generate_train(n_attributes=10)

8 attributes:
Dataset size: 800
Sample records:
(0, 2, 1, 2, 1, 1, 0, 1)
(0, 1, 1, 1, 1, 1, 1, 1)
(0, 2, 1, 0, 0, 0, 2, 6)
(0, 1, 2, 1, 0, 1, 2, 1)
(1, 1, 2, 1, 0, 1, 1, 1)
(0, 5, 1, 1, 0, 1, 0, 6)
(1, 1, 1, 1, 1, 1, 0, 1)
(1, 3, 1, 1, 0, 0, 2, 1)
(0, 0, 2, 6, 0, 2, 0, 10)
(0, 2, 2, 4, 0, 0, 0, 1)

Sample labels
[1, 0, 0, 1, 1, 0, 0, 1, 1, 0]
