### Importing important libraries, reading the data from the disk and then cleaning it so that it can be fed to the model.

In [1]:
from collections import Counter
import numpy as np
import pandas as pd
import time
import sys

In [2]:
df = pd.read_csv('train.csv', delimiter='~')

In [3]:
df.head(10)

Unnamed: 0,User_ID,Description,Browser_Used,Device_Used,Is_Response
0,11755,After reading mixed reviews I almost didn't bo...,Google Chrome,Desktop,Good
1,33912,This motor inn is located about - city blocks ...,Firefox,Tablet,Good
2,10143,It was our first time there and surely not our...,Google Chrome,Mobile,Good
3,33114,"Great hotel in an excellent location, just off...",Mozilla,Desktop,Good
4,17464,We stayed at the hotel for - weeks to get away...,Google Chrome,Desktop,Good
5,34367,Myself and two girlfriends were in NYC for - n...,InternetExplorer,Mobile,Good
6,14524,I made last minute reservation and couldnt fin...,Edge,Desktop,Bad
7,35130,Stayed at this hotel for - week. Very nice hot...,InternetExplorer,Mobile,Bad
8,1004,My wife and I stayed in the Michelangelo for -...,InternetExplorer,Desktop,Good
9,27086,My wife and I stayed their after a business me...,InternetExplorer,Desktop,Good


### To analyse the response we majorly need Description , so we will define our feature matrix with Description and for label we will take Is_Response

In [4]:
responses = df['Description']
label = df['Is_Response']

In [5]:
responses[0][0:80]

"After reading mixed reviews I almost didn't book at the W, but I was attending a"

In [6]:
label[0]

'Good'

In [7]:
# Function to print data 
def print_data_response(record):
    print(label[record], "-->\t", responses[record][0:100])

In [8]:
check_list = [2,5,7,10,23,34,65,77,66,98,100] # randomly checking the data
for i in check_list:
    print_data_response(i)

Good -->	 It was our first time there and surely not our last. Arrived very early off the train and went there
Good -->	 Myself and two girlfriends were in NYC for - night for both business and fun! We impressed right off
Bad -->	 Stayed at this hotel for - week. Very nice hotel, rooms were nice, beds were good. There is only two
Good -->	 My mom and I stayed here for - nights on a short vacation to Seattle and were extremely happy with o
Good -->	 This was a great place to end our vacation. We arrived well before check-in time but were immediatel
Good -->	 This hotel is sooo very close to the Opera House.
You can catch a cab to anywhere-they are always go
Good -->	 I always stay at -- Park Ave Hotel when I travel to NY on business (or for any reason). The hotel is
Bad -->	 Arrived around -pm. Prepaid through Priceline for a room with - double beds. They said they had noth
Bad -->	 Had the opportunity to stay at the Herbert Hotel.... should of passed it up!! Room was o.k.. First o
Good

In [9]:
#label = [1 if x =='Good' else 0 for x in label]
label = label.tolist()
responses = responses.tolist()

### Noise reduction

In [10]:
noise = [':', "(", ")", "-", ".", "\n", ","]

for m,i in enumerate(responses):
    for j in noise:
        responses[m] = responses[m].replace(j," ")
    responses[m] = responses[m].lower()

In [11]:
for i in check_list:
    print_data_response(i)

Good -->	 it was our first time there and surely not our last  arrived very early off the train and went there
Good -->	 myself and two girlfriends were in nyc for   night for both business and fun! we impressed right off
Bad -->	 stayed at this hotel for   week  very nice hotel  rooms were nice  beds were good  there is only two
Good -->	 my mom and i stayed here for   nights on a short vacation to seattle and were extremely happy with o
Good -->	 this was a great place to end our vacation  we arrived well before check in time but were immediatel
Good -->	 this hotel is sooo very close to the opera house  you can catch a cab to anywhere they are always go
Good -->	 i always stay at    park ave hotel when i travel to ny on business  or for any reason   the hotel is
Bad -->	 arrived around  pm  prepaid through priceline for a room with   double beds  they said they had noth
Bad -->	 had the opportunity to stay at the herbert hotel     should of passed it up!! room was o k   first o
Good

## Deriving a theory and correlation

#### Counters

In [12]:
good_count = Counter()
bad_count = Counter()
total_count = Counter()

In [13]:
for i in range(len(label)):
    if label[i] == "Good":
        for word in responses[i].split(" "):
            good_count[word] += 1
            total_count[word] += 1
    else:
        for word in responses[i].split(" "):
            bad_count[word] += 1
            total_count[word] += 1

In [14]:
good_count.most_common()[0:20]

[('', 451513),
 ('the', 191563),
 ('and', 110328),
 ('a', 87546),
 ('to', 72853),
 ('was', 64016),
 ('in', 48305),
 ('i', 45407),
 ('we', 42683),
 ('of', 40546),
 ('is', 38480),
 ('for', 35784),
 ('hotel', 35248),
 ('it', 28361),
 ('room', 26966),
 ('very', 24038),
 ('at', 23782),
 ('with', 23210),
 ('were', 22357),
 ('but', 21255)]

In [15]:
bad_count.most_common()[0:20]

[('', 283006),
 ('the', 120457),
 ('and', 55716),
 ('a', 49468),
 ('to', 49331),
 ('was', 40227),
 ('i', 36418),
 ('in', 30761),
 ('of', 24533),
 ('we', 24230),
 ('for', 21944),
 ('room', 20673),
 ('it', 20368),
 ('is', 20311),
 ('hotel', 19311),
 ('that', 16952),
 ('not', 15989),
 ('but', 15112),
 ('on', 14672),
 ('at', 14650)]

In [16]:
good_to_bad_ratio = Counter()

In [17]:
for word,count in total_count.most_common():
    if count>100:
        ratio = good_count[word]/float(bad_count[word]+1)
        good_to_bad_ratio[word] = ratio

In [18]:
good_to_bad_ratio.most_common()[0:20]

[('spotlessly', 21.333333333333332),
 ('hesitate', 19.23076923076923),
 ('exceeded', 15.909090909090908),
 ('spotless', 15.56),
 ('immaculate', 14.357142857142858),
 ('delightful', 13.583333333333334),
 ('back!', 13.333333333333334),
 ('perfect!', 12.5),
 ('bryant', 12.5),
 ('beautifully', 11.8),
 ('wonderfully', 11.5),
 ('highly', 11.436507936507937),
 ('hesitation', 10.727272727272727),
 ('loved', 10.581151832460733),
 ('amazing!', 10.5),
 ('delicious', 10.377358490566039),
 ('recommending', 10.333333333333334),
 ('gem', 10.277777777777779),
 ('welcomed', 10.222222222222221),
 ('ferry', 9.722222222222221)]

## With the help of these ratios we can check the words occuring in the good and bad response and provide them score

### Lets Check that score for some words

In [19]:
print("Check For Good Response/Description\n")
print("Ratio for word 'great' ",good_to_bad_ratio['great'] )
print("Ratio for word 'perfect' ",good_to_bad_ratio['perfect'] )
print("Ratio for word 'delightful' ",good_to_bad_ratio['delightful'] )
print("Ratio for word 'amazing' ",good_to_bad_ratio['amazing'] )
print("Ratio for word 'spotlessly' ",good_to_bad_ratio['spotlessly'] )
####------------------------------------------------------------------####
print("\nCheck For Bad Response/Description\n")
print("Ratio for word 'refund' ",good_to_bad_ratio['refund'] )
print("Ratio for word 'filthy' ",good_to_bad_ratio['filthy'] )
print("Ratio for word 'worst' ",good_to_bad_ratio['worst'] )
print("Ratio for word 'unacceptable' ",good_to_bad_ratio['unacceptable'] )
print("Ratio for word 'disgusting' ",good_to_bad_ratio['disgusting'] )

Check For Good Response/Description

Ratio for word 'great'  4.695397244209909
Ratio for word 'perfect'  9.522058823529411
Ratio for word 'delightful'  13.583333333333334
Ratio for word 'amazing'  7.751269035532995
Ratio for word 'spotlessly'  21.333333333333332

Check For Bad Response/Description

Ratio for word 'refund'  0.10762331838565023
Ratio for word 'filthy'  0.048582995951417005
Ratio for word 'worst'  0.1059190031152648
Ratio for word 'unacceptable'  0.19791666666666666
Ratio for word 'disgusting'  0.04484304932735426


### We can see we have a really low score for negative words and high for positive. We can further clear this by taking log values

In [20]:
values = good_to_bad_ratio.values()

In [21]:
min_val = list(values)[0]
for i in list(values):
    if i >0 and i < min_val:
        min_val = i
     

In [22]:
for word, ratio in good_to_bad_ratio.most_common():
    if ratio <= 0 :
        good_to_bad_ratio[word] = min_val

In [23]:
for word, ratio in good_to_bad_ratio.most_common():
    good_to_bad_ratio[word] = np.log(ratio)

In [24]:
print("Check For Good Response/Description\n")
print("Ratio for word 'great' ",good_to_bad_ratio['great'] )
print("Ratio for word 'perfect' ",good_to_bad_ratio['perfect'] )
print("Ratio for word 'delightful' ",good_to_bad_ratio['delightful'] )
print("Ratio for word 'amazing' ",good_to_bad_ratio['amazing'] )
print("Ratio for word 'spotlessly' ",good_to_bad_ratio['spotlessly'] )
####------------------------------------------------------------------####
print("\nCheck For Bad Response/Description\n")
print("Ratio for word 'refund' ",good_to_bad_ratio['refund'] )
print("Ratio for word 'filthy' ",good_to_bad_ratio['filthy'] )
print("Ratio for word 'worst' ",good_to_bad_ratio['worst'] )
print("Ratio for word 'unacceptable' ",good_to_bad_ratio['unacceptable'] )
print("Ratio for word 'disgusting' ",good_to_bad_ratio['disgusting'] )

Check For Good Response/Description

Ratio for word 'great'  1.54658271914
Ratio for word 'perfect'  2.2536110884
Ratio for word 'delightful'  2.60884355102
Ratio for word 'amazing'  2.04785657648
Ratio for word 'spotlessly'  3.06027079469

Check For Bad Response/Description

Ratio for word 'refund'  -2.22911794111
Ratio for word 'filthy'  -3.02448168684
Ratio for word 'worst'  -2.24508059851
Ratio for word 'unacceptable'  -1.6199092123
Ratio for word 'disgusting'  -3.10458667847


### Now we can clearly see the negative values for the words mostly occuring in bad response and positive for good. We can now hopefully derive such relations while building model.

## Now we will try to define a structure on which we will be building our model

#### We will get the vocab size, define the layer with that size and then fill the index related to the word with one 

In [25]:
vocab = set(total_count.keys())
vocab_size = len(vocab)
vocab_size

57333

In [26]:
layer_0 = np.zeros((1,vocab_size))
layer_0

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [27]:
word_index = {}
for i, word in enumerate(vocab):
    word_index[word] = i
word_index

{'': 0,
 'emerge': 1,
 'face""""""""""""""""""""""""""""""""': 2,
 'com': 3,
 'prius': 4,
 'divan': 5,
 "mimosa's": 6,
 'roger': 7,
 'mornng': 8,
 'tortuerous': 9,
 'gimlet!': 10,
 'cruel': 11,
 'vantage': 12,
 'fiancial': 13,
 'faintest': 14,
 'fooling': 15,
 'fierce': 16,
 'choicehotel': 17,
 "day'": 18,
 'manning': 19,
 'keneshia': 20,
 'need?': 21,
 'yaaaaay!!': 22,
 'unrestful': 23,
 "vehicles'": 24,
 "traveler's": 25,
 'exagerated': 26,
 'ponte': 27,
 'saff': 28,
 'lid!': 29,
 'whererever': 30,
 'blunds': 31,
 'everyone!!': 32,
 'celing': 33,
 'zipping': 34,
 'challange': 35,
 '""""""""""""""""""""""""""""""""bread': 36,
 'exlpain': 37,
 'economics': 38,
 'inconveniently': 39,
 'sercurity': 40,
 'smithsonian': 41,
 'armchairs': 42,
 'elan': 43,
 "brittany's": 44,
 'sardines!': 45,
 'scandinavians': 46,
 'dealership': 47,
 'replacment': 48,
 'meanest': 49,
 'scientology': 50,
 '*free*': 51,
 '""""""""""""""""""""""""""""""""braun""""""""""""""""""""""""""""""""': 52,
 'insurance!”

In [28]:
def update_input_layer(response):
    global layer_0
    layer_0 *= 0
    for word in response.split(" "):
        layer_0[0][word_index[word]] = 1
update_input_layer(responses[0])

In [29]:
layer_0

array([[ 1.,  0.,  0., ...,  0.,  0.,  0.]])

In [30]:
class NeuralNetwork:
    def __init__(self, responses,labels,hidden_nodes = 15, learning_rate = 0.1):
        """NeuralNetwork with the given settings
        Args:
            responses(list) - List of response used for training
            labels(list) - List of Good/Bad labels associated with the given responses
            hidden_nodes(int) - Number of nodes to create in the hidden layer
            learning_rate(float) - Learning rate to use while training
        
        """
        
        self.pre_process_data(responses, labels)
        
        self.init_network(len(self.response_vocab),hidden_nodes, 1, learning_rate)
    
    
    def pre_process_data(self, response, labels):
        
        response_vocab = set()
        for response in responses:
            for word in response.split(" "):
                response_vocab.add(word)

        self.response_vocab = list(response_vocab)
        
        label_vocab = set()
        for label in labels:
            label_vocab.add(label)
        
        self.label_vocab = list(label_vocab)
        
        self.response_vocab_size = len(self.response_vocab)
        self.label_vocab_size = len(self.label_vocab)
        
        self.word2index = {}
        for i, word in enumerate(self.response_vocab):
            self.word2index[word] = i
        
        self.label2index = {}
        for i, label in enumerate(self.label_vocab):
            self.label2index[label] = i
        
    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        self.learning_rate = learning_rate

        self.weights_0_1 = np.zeros((self.input_nodes,self.hidden_nodes))
    
        self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, 
                                                (self.hidden_nodes, self.output_nodes))
        
        self.layer_0 = np.zeros((1,input_nodes))
    
        
    def update_input_layer(self,response):

        self.layer_0 *= 0
        
        for word in response.split(" "):
            
            if(word in self.word2index.keys()):
                
                self.layer_0[0][self.word2index[word]] = 1
                
    def get_target_for_label(self,label):
        
        if(label == 'Good'):
            return 1
        else:
            return 0
        
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_output_2_derivative(self,output):
        return output * (1 - output)
    
    def train(self, training_responses, training_labels):
        
        
        assert(len(training_responses) == len(training_labels))
        
        correct_so_far = 0

        start = time.time()
        
        for i in range(len(training_responses)):
            
            response = training_responses[i]
            label = training_labels[i]
            
            #----- Feed Forward -----#
            self.update_input_layer(response)

            # Hidden layer
            layer_1 = self.layer_0.dot(self.weights_0_1)

            # Output layer
            layer_2 = self.sigmoid(layer_1.dot(self.weights_1_2))
            
            #----- Back Propagation -----#

            # Output error
            layer_2_error = layer_2 - self.get_target_for_label(label) 
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)

            # Backpropagated error
            layer_1_error = layer_2_delta.dot(self.weights_1_2.T)
            layer_1_delta = layer_1_error 

            # Update the weights
            self.weights_1_2 -= layer_1.T.dot(layer_2_delta) * self.learning_rate 
            self.weights_0_1 -= self.layer_0.T.dot(layer_1_delta) * self.learning_rate 

            if(layer_2 >= 0.5 and label == 'Good'):
                correct_so_far += 1
            elif(layer_2 < 0.5 and label == 'Bad'):
                correct_so_far += 1
            
            elapsed_time = float(time.time() - start)
            responses_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_responses)))[:4] \
                             + "% Speed(responses/sec):" + str(responses_per_second)[0:5] \
                             + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) \
                             + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
            
            if(i % 5000 == 0 and i != 0):
                print("")
    
    def test(self, testing_responses, testing_labels):
        
        correct = 0

        start = time.time()

        for i in range(len(testing_responses)):
            pred = self.run(testing_responses[i])
            
            if(pred == testing_labels[i]):
                correct += 1
        
            elapsed_time = float(time.time() - start)
            responses_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_responses)))[:4] \
                             + "% Speed(responses/sec):" + str(responses_per_second)[0:5] \
                             + " #Correct:" + str(correct) + " #Tested:" + str(i+1) \
                             + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")
    
    def run(self, response):
        
        # Input Layer
        self.update_input_layer(response.lower())

        # Hidden layer
        layer_1 = self.layer_0.dot(self.weights_0_1)

        # Output layer
        layer_2 = self.sigmoid(layer_1.dot(self.weights_1_2))
        
        if(layer_2[0] >= 0.5):
            return "Good"
        else:
            return "Bad"

In [31]:
mlp = NeuralNetwork(responses[:-1000],label[:-1000], learning_rate=0.05)

In [32]:
mlp.train(responses[:-1000],label[:-1000])

Progress:17.1% Speed(responses/sec):59.01 #Correct:4108 #Trained:5001 Training Accuracy:82.1%
Progress:34.2% Speed(responses/sec):59.23 #Correct:8379 #Trained:10001 Training Accuracy:83.7%
Progress:51.4% Speed(responses/sec):59.17 #Correct:12668 #Trained:15001 Training Accuracy:84.4%
Progress:68.5% Speed(responses/sec):59.21 #Correct:16997 #Trained:20001 Training Accuracy:84.9%
Progress:85.6% Speed(responses/sec):58.83 #Correct:21365 #Trained:25001 Training Accuracy:85.4%
Progress:99.9% Speed(responses/sec):58.53 #Correct:25032 #Trained:29172 Training Accuracy:85.8%

In [33]:
mlp.test(responses[-1000:],label[-1000:])

Progress:99.9% Speed(responses/sec):510.7 #Correct:868 #Tested:1000 Testing Accuracy:86.8%

### Here we are able to get a decent model but we can increase the speed of this model, here we have a speed of around 57-59 responses per second. we can increase this.

#### We need to identify wasteful computation and elminate them

In [34]:
layer_0 = np.zeros(10)

In [35]:
layer_0

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [36]:
layer_0[4] = 1
layer_0[7] = 1

In [37]:
layer_0

array([ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.])

In [38]:
weights = np.random.randn(10,5)

In [39]:
np.dot(layer_0, weights)

array([-0.26535254,  0.3776197 ,  1.4133905 ,  0.29637708, -1.55469582])

In [40]:
indices = [4,7]

In [41]:
layer_1 = np.zeros(5)

In [42]:
for index in indices:
    layer_1 += (1 * weights[index])

In [43]:
layer_1

array([-0.26535254,  0.3776197 ,  1.4133905 ,  0.29637708, -1.55469582])

### We can see the same result.


In [44]:
class NeuralNetwork2:
    def __init__(self, responses,labels,hidden_nodes = 20, learning_rate = 0.1):
        
        self.pre_process_data(responses, labels)
        
        self.init_network(len(self.response_vocab),hidden_nodes, 1, learning_rate)

    def pre_process_data(self, responses, labels):
        
        response_vocab = set()
        for response in responses:
            for word in response.split(" "):
                response_vocab.add(word)

        self.response_vocab = list(response_vocab)
        
        label_vocab = set()
        for label in labels:
            label_vocab.add(label)
        
        self.label_vocab = list(label_vocab)
        
        self.response_vocab_size = len(self.response_vocab)
        self.label_vocab_size = len(self.label_vocab)
        
        self.word2index = {}
        for i, word in enumerate(self.response_vocab):
            self.word2index[word] = i
        
        self.label2index = {}
        for i, label in enumerate(self.label_vocab):
            self.label2index[label] = i

    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        self.learning_rate = learning_rate

        self.weights_0_1 = np.zeros((self.input_nodes,self.hidden_nodes))

        self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, 
                                                (self.hidden_nodes, self.output_nodes))
        
        
        self.layer_1 = np.zeros((1,hidden_nodes))
    
    def get_target_for_label(self,label):
        if(label == 'Good'):
            return 1
        else:
            return 0
        
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_output_2_derivative(self,output):
        return output * (1 - output)
    
   
    def train(self, training_responses_raw, training_labels):

        training_responses = list()
        for response in training_responses_raw:
            indices = set()
            for word in response.split(" "):
                if(word in self.word2index.keys()):
                    indices.add(self.word2index[word])
            training_responses.append(list(indices))

        
        assert(len(training_responses) == len(training_labels))
                
        correct_so_far = 0

        start = time.time()
        
        for i in range(len(training_responses)):
            
            response = training_responses[i]
            label = training_labels[i]
            
            self.layer_1 *= 0
            for index in response:
                self.layer_1 += self.weights_0_1[index]

            layer_2 = self.sigmoid(self.layer_1.dot(self.weights_1_2))            
            
            layer_2_error = layer_2 - self.get_target_for_label(label)
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)

            layer_1_error = layer_2_delta.dot(self.weights_1_2.T)
            layer_1_delta = layer_1_error

            
            self.weights_1_2 -= self.layer_1.T.dot(layer_2_delta) * self.learning_rate
            
            
            for index in response:
                self.weights_0_1[index] -= layer_1_delta[0] * self.learning_rate 

            if(layer_2 >= 0.5 and label == 'Good'):
                correct_so_far += 1
            elif(layer_2 < 0.5 and label == 'Bad'):
                correct_so_far += 1
            
            elapsed_time = float(time.time() - start)
            responses_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_responses)))[:4] \
                             + "% Speed(response/sec):" + str(responses_per_second)[0:5] \
                             + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) \
                             + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
            if(i % 2500 == 0):
                print("")
    
    def test(self, testing_responses, testing_labels):
        
        correct = 0
        start = time.time()
 
        for i in range(len(testing_responses)):
            pred = self.run(testing_responses[i])

            if(pred == testing_labels[i]):
                correct += 1
            
            elapsed_time = float(time.time() - start)
            responses_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_responses)))[:4] \
                             + "% Speed(responses/sec):" + str(responses_per_second)[0:5] \
                             + " #Correct:" + str(correct) + " #Tested:" + str(i+1) \
                             + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")
       
    
    def run(self, response):
        
        self.layer_1 *= 0
        unique_indices = set()
        for word in response.lower().split(" "):
            if word in self.word2index.keys():
                unique_indices.add(self.word2index[word])
        for index in unique_indices:
            self.layer_1 += self.weights_0_1[index]
        
        layer_2 = self.sigmoid(self.layer_1.dot(self.weights_1_2))
        
        if(layer_2[0] >= 0.5):
            return "Good"
        else:
            return "Bad"

In [45]:
mlp2 = NeuralNetwork2(responses[:-200],label[:-200], learning_rate=0.01)

In [46]:
mlp2.train(responses[:-200],label[:-200])

Progress:0.0% Speed(response/sec):0.0 #Correct:1 #Trained:1 Training Accuracy:100.%
Progress:8.34% Speed(response/sec):537.1 #Correct:2014 #Trained:2501 Training Accuracy:80.5%
Progress:16.6% Speed(response/sec):510.6 #Correct:4088 #Trained:5001 Training Accuracy:81.7%
Progress:25.0% Speed(response/sec):505.3 #Correct:6232 #Trained:7501 Training Accuracy:83.0%
Progress:33.3% Speed(response/sec):499.4 #Correct:8335 #Trained:10001 Training Accuracy:83.3%
Progress:41.7% Speed(response/sec):499.9 #Correct:10473 #Trained:12501 Training Accuracy:83.7%
Progress:50.0% Speed(response/sec):495.3 #Correct:12590 #Trained:15001 Training Accuracy:83.9%
Progress:58.3% Speed(response/sec):493.1 #Correct:14701 #Trained:17501 Training Accuracy:84.0%
Progress:66.7% Speed(response/sec):489.3 #Correct:16868 #Trained:20001 Training Accuracy:84.3%
Progress:75.0% Speed(response/sec):486.4 #Correct:19046 #Trained:22501 Training Accuracy:84.6%
Progress:83.4% Speed(response/sec):484.8 #Correct:21217 #Trained:250

In [47]:
mlp2.test(responses[-200:],label[-200:])

Progress:0.0% Speed(responses/sec):0.0 #Correct:1 #Tested:1 Testing Accuracy:100.%Progress:0.5% Speed(responses/sec):333.3 #Correct:2 #Tested:2 Testing Accuracy:100.%Progress:1.0% Speed(responses/sec):499.9 #Correct:3 #Tested:3 Testing Accuracy:100.%Progress:1.5% Speed(responses/sec):599.9 #Correct:4 #Tested:4 Testing Accuracy:100.%Progress:2.0% Speed(responses/sec):499.9 #Correct:5 #Tested:5 Testing Accuracy:100.%Progress:2.5% Speed(responses/sec):555.5 #Correct:6 #Tested:6 Testing Accuracy:100.%Progress:3.0% Speed(responses/sec):599.9 #Correct:7 #Tested:7 Testing Accuracy:100.%Progress:3.5% Speed(responses/sec):538.4 #Correct:7 #Tested:8 Testing Accuracy:87.5%Progress:4.0% Speed(responses/sec):571.3 #Correct:8 #Tested:9 Testing Accuracy:88.8%Progress:4.5% Speed(responses/sec):562.4 #Correct:9 #Tested:10 Testing Accuracy:90.0%Progress:5.0% Speed(responses/sec):555.5 #Correct:9 #Tested:11 Testing Accuracy:81.8%Progress:5.5% Speed(responses/sec):578.9 #Correct:10 #Tested:12 

Progress:49.0% Speed(responses/sec):608.6 #Correct:88 #Tested:99 Testing Accuracy:88.8%Progress:49.5% Speed(responses/sec):607.3 #Correct:89 #Tested:100 Testing Accuracy:89.0%Progress:50.0% Speed(responses/sec):606.0 #Correct:90 #Tested:101 Testing Accuracy:89.1%Progress:50.5% Speed(responses/sec):608.3 #Correct:90 #Tested:102 Testing Accuracy:88.2%Progress:51.0% Speed(responses/sec):607.1 #Correct:91 #Tested:103 Testing Accuracy:88.3%Progress:51.5% Speed(responses/sec):605.8 #Correct:92 #Tested:104 Testing Accuracy:88.4%Progress:52.0% Speed(responses/sec):604.6 #Correct:93 #Tested:105 Testing Accuracy:88.5%Progress:52.5% Speed(responses/sec):603.4 #Correct:93 #Tested:106 Testing Accuracy:87.7%Progress:53.0% Speed(responses/sec):595.4 #Correct:94 #Tested:107 Testing Accuracy:87.8%Progress:53.5% Speed(responses/sec):597.7 #Correct:95 #Tested:108 Testing Accuracy:87.9%Progress:54.0% Speed(responses/sec):599.9 #Correct:96 #Tested:109 Testing Accuracy:88.0%Progress:54.5% Speed(

In [48]:
mlp2.run("great")

'Good'

In [49]:
mlp2.run("filthy hotel")

'Bad'

## Here we have almost the same accuracy but responses/sec have increased from around 50 to somewhere around 500

In [50]:
test_df = pd.read_csv('test.csv', delimiter='~')

In [51]:
test_df.head()

Unnamed: 0,User_ID,Description,Browser_Used,Device_Used
0,9602,A friend and I stayed in this hotel when we we...,Edge,Desktop
1,8749,I enjoy staying here when I have early flights...,Google Chrome,Mobile
2,15500,I stopped off in Seattle during a train tour o...,Chrome,Mobile
3,5495,I have stayed at this hotel - or - times now f...,Mozilla Firefox,Desktop
4,18570,Excellent location with hop on hop off city tr...,Edge,Mobile


In [52]:
sample_df = pd.read_csv("sample_submission.csv", delimiter="~")

In [53]:
sample_df.head()

Unnamed: 0,User_ID,Is_Response
0,9602,
1,8749,
2,15500,
3,5495,
4,18570,


In [54]:
test_response = []
test_desc = list(test_df["Description"])
test_userid = list(test_df["User_ID"])

In [55]:
noise = [':', "(", ")", "-", ".", "\n", ","]

for m,i in enumerate(test_desc):
    for j in noise:
        test_desc[m] = test_desc[m].replace(j," ")
    test_desc[m] = test_desc[m].lower()

In [56]:
for i in test_desc:
    test_response.append(mlp2.run(i))

In [57]:
test_response[0:20]

['Good',
 'Good',
 'Good',
 'Good',
 'Good',
 'Good',
 'Good',
 'Good',
 'Good',
 'Good',
 'Good',
 'Bad',
 'Bad',
 'Good',
 'Good',
 'Good',
 'Good',
 'Good',
 'Good',
 'Good']

In [58]:

submission_df = pd.DataFrame({"User_ID": test_userid, "Is_Response": test_response})

In [59]:
submission_df.head()

Unnamed: 0,Is_Response,User_ID
0,Good,9602
1,Good,8749
2,Good,15500
3,Good,5495
4,Good,18570


In [60]:
test_df.Description[0][-100:]

' correctly).\nAll in all, it was a good, safe, affordable hotel. I would definitely stay there again.'

In [61]:
test_desc[0][-100:]

' correctly   all in all  it was a good  safe  affordable hotel  i would definitely stay there again '

In [62]:
submission_df.to_csv("submission.csv", index=False, sep="~", encoding='utf-8')

In [63]:
read_sub = pd.read_csv("submission.csv", delimiter="~")

In [64]:
read_sub.head()

Unnamed: 0,Is_Response,User_ID
0,Good,9602
1,Good,8749
2,Good,15500
3,Good,5495
4,Good,18570


In [65]:
sample_df.head()

Unnamed: 0,User_ID,Is_Response
0,9602,
1,8749,
2,15500,
3,5495,
4,18570,


### END