#### Data Processing

In [1]:
# initialization
import pandas as pd

data = pd.read_csv("initialdata.csv")

In [155]:
encodedData = pd.DataFrame()
for col in data.columns:
    df = pd.get_dummies(data[col]) # one hot encoding
    for newcol in df.columns:
        name = str(col)+"_"+str(newcol)
        encodedData[name] = df[newcol]
        
# single output column
encodedDataComplete = encodedData.drop("Default_N", axis=1)

In [156]:
xCol = encodedDataComplete.columns[0:-1]
yCol = encodedDataComplete.columns[-1]
outputData = encodedDataComplete[yCol]
encodedData = encodedDataComplete[xCol]

In [157]:
encodedData

Unnamed: 0,Age_24,Age_30,Age_31,Age_42,Gender_F,Gender_M
0,1,0,0,0,0,1
1,0,1,0,0,1,0
2,0,0,1,0,1,0
3,0,0,0,1,0,1


In [158]:
outputData

0    1
1    1
2    0
3    0
Name: Default_Y, dtype: uint8

#### Neural Logic Network

In [4]:
# logic gate learning

import numpy
import scipy.special
import glob
import scipy.misc
class neuralNetwork:
    def __init__(self, inputNodes, hiddenOneNodes, hiddenTwoNodes, hiddenThreeNodes, finalNodes, alpha):
        self.inputNodes = inputNodes
        self.hiddenOneNodes = hiddenOneNodes
        self.hiddenTwoNodes = hiddenTwoNodes
        self.hiddenThreeNodes = hiddenThreeNodes
        self.finalNodes = finalNodes
        self.alpha = alpha
        self.weightsInputHidden = numpy.random.normal(0.0, pow(self.hiddenOneNodes, -0.5),(self.hiddenOneNodes,self.inputNodes))
        self.weightsHiddenOneHiddenTwo = numpy.random.normal(0.0, pow(self.hiddenTwoNodes,-0.5),(self.hiddenTwoNodes,self.hiddenOneNodes))
        self.weightsHiddenTwoHiddenThree = numpy.random.normal(0.0, pow(self.hiddenThreeNodes,-0.5),(self.hiddenThreeNodes,self.hiddenTwoNodes))
        self.weightsHiddenOutput = numpy.random.normal(0.0, pow(self.hiddenOneNodes,-0.5),(self.finalNodes, self.hiddenThreeNodes))
        pass
    def train(self, inputs, target):
        inputs = numpy.array(inputs, ndmin=2).T
        target = numpy.array(target, ndmin=2).T
        hiddenInput = numpy.dot(self.weightsInputHidden,inputs)
        hiddenOneOutput = self.sigmoid(hiddenInput)
        hiddenTwoInput = numpy.dot(self.weightsHiddenOneHiddenTwo,hiddenOneOutput)
        hiddenTwoOutput = self.sigmoid(hiddenTwoInput)
        hiddenThreeInput = numpy.dot(self.weightsHiddenTwoHiddenThree,hiddenTwoOutput)
        hiddenThreeOutput = self.sigmoid(hiddenThreeInput)
        finalInput = numpy.dot(self.weightsHiddenOutput,hiddenThreeOutput)
        finalOutput = self.sigmoid(finalInput)
        outputError = target - finalOutput
        hiddenOutputError = numpy.dot(self.weightsHiddenOutput.T, outputError)
        hiddenThreeHiddenTwoError = numpy.dot(self.weightsHiddenTwoHiddenThree.T, hiddenOutputError)
        hiddenTwoHiddenOneError = numpy.dot(self.weightsHiddenOneHiddenTwo.T, hiddenThreeHiddenTwoError)
        hiddenInputError = numpy.dot(self.weightsInputHidden.T, hiddenTwoHiddenOneError)
        self.weightsHiddenOutput += self.alpha * numpy.dot((outputError * finalOutput * (1.0 - finalOutput)),numpy.transpose(hiddenThreeOutput))
        self.weightsHiddenTwoHiddenThree += self.alpha * numpy.dot((hiddenOutputError * hiddenThreeOutput * (1.0 - hiddenThreeOutput)),numpy.transpose(hiddenTwoOutput))
        self.weightsHiddenOneHiddenTwo += self.alpha * numpy.dot((hiddenThreeHiddenTwoError * hiddenTwoOutput * (1.0 - hiddenTwoOutput)),numpy.transpose(hiddenOneOutput))
        self.weightsInputHidden += self.alpha * numpy.dot((hiddenTwoHiddenOneError * hiddenOneOutput * (1.0 - hiddenOneOutput)),numpy.transpose(inputs))        
        pass
    def query(self, inputs):
        inputs = numpy.array(inputs, ndmin=2).T
        hiddenInput = numpy.dot(self.weightsInputHidden,inputs)
        hiddenOneOutput = self.sigmoid(hiddenInput)
        hiddenTwoInput = numpy.dot(self.weightsHiddenOneHiddenTwo,hiddenOneOutput)
        hiddenTwoOutput = self.sigmoid(hiddenTwoInput)
        hiddenThreeInput = numpy.dot(self.weightsHiddenTwoHiddenThree,hiddenTwoOutput)
        hiddenThreeOutput = self.sigmoid(hiddenThreeInput)
        finalInput = numpy.dot(self.weightsHiddenOutput,hiddenThreeOutput)
        finalOutput = self.sigmoid(finalInput)
        return finalOutput
        pass
    def sigmoid(self, x):
        return scipy.special.expit(x)
        pass

In [5]:
#AND
nnAND = neuralNetwork(2,12,36,12,1,0.1)
print('Before training')
print(nnAND.query([0,0]))
print(nnAND.query([0,1]))
print(nnAND.query([1,0]))
print(nnAND.query([1,1]))
print("Training...")
for i in range(0, 10000):
    nnAND.train([0,0],[0])
    nnAND.train([0,1],[0])
    nnAND.train([1,0],[0])
    nnAND.train([1,1],[1])
print("Done")
print(nnAND.query([0,0]))
print(nnAND.query([0,1]))
print(nnAND.query([1,0]))
print(nnAND.query([1,1]))

Before training
[[0.69101173]]
[[0.69157279]]
[[0.69066017]]
[[0.6912207]]
Training...
Done
[[0.00521079]]
[[0.00521111]]
[[0.00521134]]
[[0.99391747]]


In [6]:
#OR
nnOR = neuralNetwork(2,12,36,12,1,0.1)
print('Before training')
print(nnOR.query([0,0]))
print(nnOR.query([0,1]))
print(nnOR.query([1,0]))
print(nnOR.query([1,1]))
print("Training...")
for i in range(0, 10000):
    nnOR.train([0,0],[0])
    nnOR.train([0,1],[1])
    nnOR.train([1,0],[1])
    nnOR.train([1,1],[1])
print("Done")
print(nnOR.query([0,0]))
print(nnOR.query([0,1]))
print(nnOR.query([1,0]))
print(nnOR.query([1,1]))

Before training
[[0.33073147]]
[[0.33015784]]
[[0.33008244]]
[[0.3295657]]
Training...
Done
[[0.00967447]]
[[0.99507161]]
[[0.99507161]]
[[0.99507197]]


In [7]:
#NOT
nnNOT = neuralNetwork(1,12,36,12,1,0.1)
print('Before training')
print(nnNOT.query([0]))
print(nnNOT.query([1]))
print("Training...")
for i in range(0, 10000):
    nnNOT.train([0],[1])
    nnNOT.train([1],[0])
print("Done")
print(nnNOT.query([0]))
print(nnNOT.query([1]))

Before training
[[0.35022956]]
[[0.34994206]]
Training...
Done
[[0.98900613]]
[[0.00802156]]


In [8]:
#NAND
nnNAND = neuralNetwork(2,12,36,12,1,0.1)
print('Before training')
print(nnNAND.query([0,0]))
print(nnNAND.query([0,1]))
print(nnNAND.query([1,0]))
print(nnNAND.query([1,1]))
print("Training...")
for i in range(0, 10000):
    nnNAND.train([0,0],[1])
    nnNAND.train([0,1],[1])
    nnNAND.train([1,0],[1])
    nnNAND.train([1,1],[0])
print("Done")
print(nnNAND.query([0,0]))
print(nnNAND.query([0,1]))
print(nnNAND.query([1,0]))
print(nnNAND.query([1,1]))

Before training
[[0.4075259]]
[[0.40737203]]
[[0.40733059]]
[[0.407187]]
Training...
Done
[[0.99475148]]
[[0.99475584]]
[[0.99474394]]
[[0.00862311]]


In [9]:
#XOR
nnXOR = neuralNetwork(2,12,36,12,1,0.1)
print('Before training')
print(nnXOR.query([0,0]))
print(nnXOR.query([0,1]))
print(nnXOR.query([1,0]))
print(nnXOR.query([1,1]))
print("Training...")
for i in range(0, 10000):
    nnXOR.train([0,0],[0])
    nnXOR.train([0,1],[1])
    nnXOR.train([1,0],[1])
    nnXOR.train([1,1],[0])
print("Done")
print(nnXOR.query([0,0]))
print(nnXOR.query([0,1]))
print(nnXOR.query([1,0]))
print(nnXOR.query([1,1]))

Before training
[[0.50820669]]
[[0.5083088]]
[[0.50857053]]
[[0.50867772]]
Training...
Done
[[0.00550018]]
[[0.98966125]]
[[0.98962181]]
[[0.00553738]]


In [107]:
print(nnXOR.query([1,1]))
print(nnXOR.query([0.99507161,1]))

[[0.00553738]]
[[0.00553747]]


In [330]:
# NLN Training on use case

## select two variables > select logic gate > get output and repeat until result found > if result not match, continue
## if result  match, try the same logic gate sructure with all the other rows, store accuracy score (how many match)

# parse variable by variable
## Question: should we only go through single AGe var, or all of them? 
## -- probably all of them, or else how distinguish one 1 from another?
## -- does not being of a certain age add information bits?
## -- age is already given and can be used to auto-filter; why unfilter?

from random import randint
import itertools
from tqdm import tnrange, tqdm_notebook, tqdm
from time import sleep
# for index, row in encodedData.iterrows():
# for i in tnrange(3, desc='1st loop'):
#     for j in tqdm_notebook(range(100), desc='2nd loop'):
#         sleep(0.01)

logicName = ['AND', 'OR', 'NAND', 'XOR']
logicNN = [nnAND, nnOR, nnNAND, nnXOR] # nnNOT does not work at the moment

logicHistory = []
varHistory = []
logicnameHistory = []

varLogs = []
logicLogs = []
logicnameLogs = []
accuracyLogs = []

varName = list(encodedData.columns)
placeholderList = []

# select variables
for i in tnrange(1000): # n iterations

# for i in tnrange(len(varName)):
    for var in varName:

#         var = varName[i]
        placeholderList = [item for item in varName if item!=var]

        varHistory.append(var)

        for var2 in placeholderList:
            placeholderList = [item for item in placeholderList if item!=var2]

            # variables selected > select logic gate
            randomIndex = randint(0, len(logicName)-1)
            gate = logicNN[randomIndex]

            input1 = encodedData[var][0]
            input2 = encodedData[var2][0]

            output = gate.query([input1, input2])

            logicHistory.append(gate); logicnameHistory.append(logicName[randomIndex])
            varHistory.append(var2)
            
#             print(varHistory)

            # use output with next var in placeholderlist
            for varZ in placeholderList:
                placeholderList = [item for item in placeholderList if item!=varZ]
                randomIndex = randint(0, len(logicName)-1)
                gate = logicNN[randomIndex]
                inputZ = encodedData[varZ][0]

                output = gate.query([output[0][0], inputZ])
    #             print(placeholderList)
    #             print(output)

                logicHistory.append(gate); logicnameHistory.append(logicName[randomIndex])
                varHistory.append(varZ)

                # check output
                if varZ == list(encodedData.columns)[-1]:

                    if round(output[0][0]) == outputData[0]: # output match -- now check how many rows of data does the logic model fit
    #                     print([output[0][0], outputData[0]])

                        # execute for all rows, generate accuracy score

                        for index, row in encodedDataComplete.iterrows():
                            correctCount = 0
                            intCounter = 0
                            v1 = row[varHistory[intCounter]]
                            while intCounter < len(varHistory)-1:
#                                 try: 
#                                 print(len(varHistory)-1)
#                                 print(intCounter)
#                                 print(intCounter+1)
                                lg = logicHistory[intCounter]
                                intCounter +=1
                                v2 = row[varHistory[intCounter]]
    #                             print(lg)
                                otpt = lg.query([v1, v2])[0][0]
                                v1 = otpt
#                                 print(intCounter)
#                                 except:
#                                     continue
                            if round(otpt) == row[list(encodedDataComplete.columns)[-1]]:
                                correctCount+=1

                        # store accuracy score, store history logs
                        accuracy = correctCount / float(len(encodedDataComplete))
                        accuracyLogs.append(accuracy)
                        varLogs.append(varHistory)
                        logicLogs.append(logicHistory)
                        logicnameLogs.append(logicnameHistory)

#                     else:
        logicHistory = []
        logicnameHistory = []
        varHistory = []

    print("Iteration " + str(i+1))
            
        
        ### logs
#         print(logicName[randomIndex])
#         print([var, var2])
#         print([input1, input2])
#         print(output)

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29
Iteration 30
Iteration 31
Iteration 32
Iteration 33
Iteration 34
Iteration 35
Iteration 36
Iteration 37
Iteration 38
Iteration 39
Iteration 40
Iteration 41
Iteration 42
Iteration 43
Iteration 44
Iteration 45
Iteration 46
Iteration 47
Iteration 48
Iteration 49
Iteration 50
Iteration 51
Iteration 52
Iteration 53
Iteration 54
Iteration 55
Iteration 56
Iteration 57
Iteration 58
Iteration 59
Iteration 60
Iteration 61
Iteration 62
Iteration 63
Iteration 64
Iteration 65
Iteration 66
Iteration 67
Iteration 68
Iteration 69
Iteration 70
Iteration 71
Iteration 72
Iteration 73
Iteration 74
Iteration 75
Iteration 76
Iteration 77
Iteratio

Iteration 593
Iteration 594
Iteration 595
Iteration 596
Iteration 597
Iteration 598
Iteration 599
Iteration 600
Iteration 601
Iteration 602
Iteration 603
Iteration 604
Iteration 605
Iteration 606
Iteration 607
Iteration 608
Iteration 609
Iteration 610
Iteration 611
Iteration 612
Iteration 613
Iteration 614
Iteration 615
Iteration 616
Iteration 617
Iteration 618
Iteration 619
Iteration 620
Iteration 621
Iteration 622
Iteration 623
Iteration 624
Iteration 625
Iteration 626
Iteration 627
Iteration 628
Iteration 629
Iteration 630
Iteration 631
Iteration 632
Iteration 633
Iteration 634
Iteration 635
Iteration 636
Iteration 637
Iteration 638
Iteration 639
Iteration 640
Iteration 641
Iteration 642
Iteration 643
Iteration 644
Iteration 645
Iteration 646
Iteration 647
Iteration 648
Iteration 649
Iteration 650
Iteration 651
Iteration 652
Iteration 653
Iteration 654
Iteration 655
Iteration 656
Iteration 657
Iteration 658
Iteration 659
Iteration 660
Iteration 661
Iteration 662
Iteration 663
Iterat

In [332]:
logsDF = pd.DataFrame()
logsDF['varStructure'] = varLogs
logsDF['logicStructure'] = logicnameLogs
logsDF['logicMachine'] = logicLogs
logsDF['accuracy'] = accuracyLogs
logsDF = logsDF.sort_values(by=['accuracy'], ascending=False)
logsDF

Unnamed: 0,varStructure,logicStructure,logicMachine,accuracy
2656,"[Age_24, Age_30, Age_31, Age_42, Gender_F, Gen...","[AND, AND, XOR, OR, XOR, AND, AND, AND, NAND]",[<__main__.neuralNetwork object at 0x00000217D...,0.25
2621,"[Gender_F, Age_24, Age_30, Age_31, Age_42, Gen...","[NAND, NAND, XOR, XOR, AND, NAND, OR, OR, NAND]",[<__main__.neuralNetwork object at 0x00000217D...,0.25
1298,"[Gender_F, Age_24, Age_30, Age_31, Age_42, Gen...","[NAND, OR, AND, XOR, XOR, OR, XOR, XOR, XOR]",[<__main__.neuralNetwork object at 0x00000217D...,0.25
534,"[Gender_F, Age_24, Age_30, Age_31, Age_42, Gen...","[NAND, OR, NAND, AND, XOR, AND, AND, OR, XOR]",[<__main__.neuralNetwork object at 0x00000217D...,0.25
2617,"[Gender_F, Age_24, Age_30, Age_31, Age_42, Gen...","[AND, NAND, OR, AND, NAND, XOR, XOR, OR, OR]",[<__main__.neuralNetwork object at 0x00000217D...,0.25
538,"[Gender_F, Age_24, Age_30, Age_31, Age_42, Gen...","[OR, NAND, OR, AND, NAND, NAND, XOR, NAND, XOR]",[<__main__.neuralNetwork object at 0x00000217D...,0.25
1291,"[Age_31, Age_24, Age_30, Age_42, Gender_F, Gen...","[NAND, AND, XOR, XOR, NAND, OR, OR, OR, AND]",[<__main__.neuralNetwork object at 0x00000217D...,0.25
1988,"[Age_31, Age_24, Age_30, Age_42, Gender_F, Gen...","[NAND, XOR, AND, XOR, XOR, AND, XOR, OR, AND]",[<__main__.neuralNetwork object at 0x00000217D...,0.25
2606,"[Gender_F, Age_24, Age_30, Age_31, Age_42, Gen...","[NAND, XOR, OR, AND, XOR, NAND, AND, NAND, NAND]",[<__main__.neuralNetwork object at 0x00000217D...,0.25
1989,"[Age_24, Age_30, Age_31, Age_42, Gender_F, Gen...","[XOR, AND, OR, XOR, NAND, XOR, AND, OR, XOR]",[<__main__.neuralNetwork object at 0x00000217D...,0.25


In [159]:
logsDF.to_csv("logs_NLN.csv")

Unnamed: 0,Age_24,Age_30,Age_31,Age_42,Gender_F,Gender_M,Default_Y
0,1,0,0,0,0,1,1
1,0,1,0,0,1,0,1
2,0,0,1,0,1,0,0
3,0,0,0,1,0,1,0
