<br>

####  Modeling with both proximity and conditionals:  Neural Nets!

In [None]:
#
#   We demonstrate _both_ clasification + regression for bitwise functions:

#     + function #1:  MAJ, the "majority" function
#                     three bits input, the most-appearing bit is the output 

#     + function #2:  XOR, the "xor" or "odd # of 1's" function 
#                     three bits input, output is their sum%2 
#                     that is, 1 if there is an odd # of 1's, 0 if an even # of 1's
#   
#   From here, we'll use NNets for the births and iris datasets
#     + births is complete
#     + iris has been started.  Your task: to complete its analysis.
#
#

In [None]:
# libraries!
import numpy as np      # numpy is Python's "array" library
import pandas as pd     # Pandas is Python's "data" library ("dataframe" == spreadsheet)

In [None]:
# let's read in our data...
# 
# for read_csv, use header=0 when row 0 is a header row
# 
filename = 'xor_cleaned.csv'
filename = 'maj_cleaned.csv'
df = pd.read_csv(filename, header=0)   # encoding="latin1" et al.
print(f"{filename} : file read into a pandas dataframe.")

In [None]:
#
# let's look at our pandas dataframe  
#
df.info()

In [None]:
df

In [None]:
#
# let's keep our column names in variables, for reference
#
COLUMNS = df.columns            # "list" of columns
print(f"COLUMNS is {COLUMNS}\n")  
  # It's a "pandas" list, called an Index
  # use it just as a Python list of strings:
print(f"COLUMNS[0] is {COLUMNS[0]}\n")

# let's create a dictionary to look up any column index by name
COL_INDEX = {}
for i, name in enumerate(COLUMNS):
    COL_INDEX[name] = i  # using the name (as key), look up the value (i)
print(f"COL_INDEX is {COL_INDEX}")


In [None]:
# all of scikit-learn's ML routines need numbers, not strings
#   ... even for categories/classifications (like species!)
#   so, we will convert the flower-species to numbers:

SPECIES = ['zero','one']   # int to str
SPECIES_INDEX = {'zero':0,'one':1}  # str to int

def convert_species(speciesname):
    """ return the species index (a unique integer/category) """
    #print(f"converting {speciesname}...")
    return SPECIES_INDEX[speciesname]

# Let's try it out...
for name in SPECIES:
    print(f"{name} maps to {convert_species(name)}")

In [None]:
#
# let's convert our dataframe to a numpy array, named A
#    Our ML library, scikit-learn operates entirely on numpy arrays.
#
A = df.to_numpy()    
A = A.astype('float64')   # and make things floating-point
print(A)

In [None]:
#
# This is a small table of data... 
# 
# Alternaively, we could just define any bitwise function, by hand! :-)
#


# print("+++ Start of MAJ (majority) example +++\n")

# A = np.asarray( [ 
#                     [0,0,0,  0],  # three input bits, one output bit (MAJ)
#                     [0,0,1,  0],   
#                     [0,1,0,  0],  
#                     [0,1,1,  1],   
#                     [1,0,0,  0],  
#                     [1,0,1,  1],   
#                     [1,1,0,  1],  
#                     [1,1,1,  1],
#                 ])


# print("+++ Start of XOR (exclusive or == odd #of 1's) example +++\n")

# A = np.asarray( [ 
#                     [0,0,0,  0],  # three input bits, one output bit (XOR) odd # of 1's
#                     [0,0,1,  1],   
#                     [0,1,0,  1],  
#                     [0,1,1,  0],   
#                     [1,0,0,  1],  
#                     [1,0,1,  0],   
#                     [1,1,0,  0],  
#                     [1,1,1,  1],
#                 ])

A = A.astype('float64')   # and make things floating-point
print(A)

In [None]:
#
# Let's split into features and labels/categories
#

# Here, we call it X_def and y_def, because it's data "from definition," not observation

X_def = A[:,0:3].copy()   # We make a copy so we don't change A
y_def = A[:,3].copy()

def ascii_table(X,y):
    """ print a table of binary inputs and outputs """
    print(f"{'input ':>18s} -> {'pred':<5s} {'des.':<5s}") 
    for i in range(len(y)):
        print(f"{X[i,:]!s:>18s} -> {'?':<5s} {y[i]:<5.0f}")   # !s is str ...
        
ascii_table(X_def,y_def)

In [None]:
#
# we can scramble the remaining data if we want to...
# 

# Here, we switch to X_all (features, inputs) and y_all (output / species)
#       to match our other modeling workflows

SCRAMBLE = False
if SCRAMBLE == True:
    NUM_ROWS = len(y_def)
    indices = np.random.permutation(NUM_ROWS)  # this scrambles the data each time
    X_all = X_def[indices]
    y_all = y_def[indices]
else:
    X_all = X_def  # don't scramble
    y_all = y_def

ascii_table(X_all,y_all)

In [None]:
#
# here, we _cheat_ by letting the full dataset 
# be _both_ the training and testing sets.  (There are too few otherwise!)
#
X_train = X_all.copy()
y_train = y_all.copy()

X_test = X_all.copy()
y_test = y_all.copy()

ascii_table(X_train,y_train)    # same as above

In [None]:
#
# for NNets, it's important to keep the feature values near 0, say -1. to 1. or so
#    This is done through the "StandardScaler" in scikit-learn
# 
USE_SCALER = True   # this variable is important! It tracks if we need to use the scaler...

# we "train the scaler"  (computes the mean and standard deviation)
if USE_SCALER == True:
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    scaler.fit(X_train)  # Scale with the training data! ave becomes 0; stdev becomes 1
else:
    # this one does no scaling!  We still create it to be consistent:
    scaler = StandardScaler(copy=True, with_mean=False, with_std=False)
    scaler.fit(X_train)  # still need to fit, though it does not change...

scaler   # is now defined and ready to use...

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Here are our scaled training and testing sets:

X_train_scaled = scaler.transform(X_train) # scale!
X_test_scaled = scaler.transform(X_test) # scale!

y_train_scaled = y_train  # the predicted/desired labels are not scaled
y_test_scaled = y_test  # not using the scaler
    
ascii_table(X_train_scaled,y_train_scaled)

#
# Note that the zeros have become -1's
# and the 1's have stayed 1's
#

In [None]:
#
# import our NNet library (within scikit-learn)
#
from sklearn.neural_network import MLPClassifier

#
# Here's where you can change the number of layers, neurons, and other parameters:
#
nn_classifier = MLPClassifier(hidden_layer_sizes=(6,7),  # 3 input -> 6 -> 7 -> 1 output
                    max_iter=500,      # how many times to train
                    activation="tanh", # the "activation function" input -> output
                    solver='sgd',      # the algorithm for optimizing weights
                    verbose=True,      # False to "mute" the training
                    shuffle=True,      # reshuffle the training epochs?
                    random_state=None, # set for reproduceability
                    learning_rate_init=.1,       # learning rate: % of error to backprop
                    learning_rate = 'adaptive')  # soften feedback as it converges

# documentation:
# scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html 
#     Try verbose / activation "relu" / other network sizes ...

print("\n\n++++++++++  TRAINING:  begin  +++++++++++++++\n\n")
nn_classifier.fit(X_train_scaled, y_train_scaled)
print("\n++++++++++  TRAINING:   end  +++++++++++++++")
print(f"The analog prediction error (the loss) is {nn_classifier.loss_}")

In [None]:
#
# what can we see about our nnet?
#
#
# how did it do on the training data?   (It's the same as the testing data, in this case!)
#

#
# which one do we want: classifier or regressor?
#

def ascii_table_for_classifier(Xsc,y,nn,scaler):
    """ a table including predictions using nn.predict """
    predictions = nn.predict(Xsc)            # all predictions
    prediction_probs = nn.predict_proba(Xsc) # all prediction probabilities
    Xpr = scaler.inverse_transform(Xsc)      # Xpr is the "X to print": unscaled data!
    # count correct
    num_correct = 0
    # printing
    print(f"{'input ':>18s} -> {'pred':^6s} {'des.':^6s}") 
    for i in range(len(y)):
        pred = predictions[i]
        pred_probs = prediction_probs[i,:]
        desired = y[i]
        if pred != desired: result = "  incorrect: " + str(pred_probs)
        else: result = "  correct"; num_correct += 1
        # Xpr = Xsc  # if you want to see the scaled versions
        print(f"{Xpr[i,:]!s:>18s} -> {pred:^6.0f} {desired:^6.0f} {result:^10s}") 
    print(f"\ncorrect predictions: {num_correct} out of {len(y)}")
    


#
# let's see how it did on the test data (also the training data!)
#
ascii_table_for_classifier(X_test_scaled,
                           y_test_scaled,
                           nn_classifier,
                           scaler)   
#
# other things...
#
nn = nn_classifier  # less to type?
print("\n\n+++++ parameters, weights, etc. +++++\n")
print(f"\nweights/coefficients:\n")
for wts in nn.coefs_:
    print(wts)
print(f"\nintercepts: {nn.intercepts_}")
print(f"\nall parameters: {nn.get_params()}")

In [None]:
#
# we have a predictive model!  Let's try it out...
#

def make_prediction( Features, nn, scaler ):
    """ uses nn for predictions """
    print("input features are", Features)
    #  we make sure Features has the right shape (list-of-lists)
    row = np.array( [Features] )  # makes an array-row
    row = scaler.transform(row)   # scale according to scaler
    print("nn.predict_proba == ", nn.predict_proba(row))   # probabilities of each
    prediction = nn.predict(row)  # max!
    return prediction
    
# our features -- note that the inputs don't have to be bits!
Features = [ 1, 0, 1 ]      # whatever we'd like to test
prediction = make_prediction(Features, nn_classifier, scaler)
print(f"prediction: {prediction}")   # just takes the max

<br>

### From classification to regression
+ NNets are more natural regressors than classifiers...
+ That is, they naturally output continuous, floating-point values
+ ... instead of a category or choice-among-labels.
+ So, let's try to predict our binary function as a floating point output instead..

In [None]:
#
# MLPRegressor predicts _floating-point_ outputs
#

from sklearn.neural_network import MLPRegressor

nn_regressor = MLPRegressor(hidden_layer_sizes=(6,7), 
                    max_iter=200,          # how many training epochs
                    activation="tanh",     # the activation function
                    solver='sgd',          # the optimizer
                    verbose=True,          # do we want to watch as it trains?
                    shuffle=True,          # shuffle each epoch?
                    random_state=None,     # use for reproducibility
                    learning_rate_init=.1, # how much of each error to back-propagate
                    learning_rate = 'adaptive')  # how to handle the learning_rate

print("\n\n++++++++++  TRAINING:  begin  +++++++++++++++\n\n")
nn_regressor.fit(X_train_scaled, y_train_scaled)
print("++++++++++  TRAINING:   end  +++++++++++++++")

print(f"The (squared) prediction error (the loss) is {nn_regressor.loss_}")
print(f"And, its square root: {nn_regressor.loss_ ** 0.5}")

In [None]:
#
# how did it do? now we're making progress (by regressing)
#

def ascii_table_for_regressor(Xsc,y,nn,scaler):
    """ a table including predictions using nn.predict """
    predictions = nn.predict(Xsc) # all predictions
    Xpr = scaler.inverse_transform(Xsc)  # Xpr is the "X to print": unscaled data!
    # measure error
    error = 0.0
    # printing
    print(f"{'input ':>18s} ->  {'pred':^6s}  {'desr':^6s}  {'absdiff':^10s}") 
    for i in range(len(y)):
        pred = predictions[i]
        desired = y[i]
        result = abs(desired - pred)
        error += result
        # Xpr = Xsc   # if you'd like to see the scaled values
        print(f"{Xpr[i,:]!s:>18s} ->  {pred:<+6.3f}  {desired:<+6.3f}  {result:^10.3f}") 
    print(f"\naverage abs error: {error/len(y)}")
    
#
# let's see how it did on the test data (also the training data!)
#
ascii_table_for_regressor(X_test_scaled,
                          y_test_scaled,
                          nn_regressor,
                          scaler)   # this is our own f'n, above
#
# other things...
#
nn = nn_regressor  # less to type?
print("\n\n+++++ parameters, weights, etc. +++++\n")
print(f"\nweights/coefficients:\n")
for wts in nn.coefs_:
    print(wts)
print(f"\nintercepts: {nn.intercepts_}")
print(f"\nall parameters: {nn.get_params()}")

In [None]:
#
# again, we have a predictive model, now a regressor.  Let's try it out...
#

def make_prediction( Features, nn, scaler ):
    """ use a NNet regressor to make a prediction """
    print("input features are", Features)
    row = np.array( [Features] )  # a list-of-lists-style input is needed
    row = scaler.transform(row)   # scale!
    prediction = nn.predict(row)
    print("nn.predict(row) == ", prediction)
    return prediction
    
# our features
Features = [ 1.0, 0.0, 1.0 ]
prediction = make_prediction(Features, nn_regressor, scaler)
print(f"prediction: {prediction}")

In [None]:
#
# Welcome to NNets!  
# 
# Onward to the births and iris data.... and beyond!
#