In [1]:
import csv
import string
import re

import nltk
from nltk import sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import wordnet
from nltk.corpus import sentiwordnet as swn

import numpy as np

from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.ensemble import RandomForestClassifier

from scipy.sparse import hstack, csr_matrix
import warnings
warnings.simplefilter(action='ignore')

import gensim
from gensim.test.utils import datapath

from collections import Counter

import pandas as pd

In [2]:
def clean_data(body):
    punctuations = string.punctuation + '—' + '’' + '…' + '‘' + '–' + '”' + '“'
    regex = re.compile('[%s]' % re.escape(punctuations))
    clean_text=''
    for sentence in sent_tokenize(body):
        sentence=regex.sub('', sentence)
        sentence=re.sub(r"\n", " ", sentence)
        sentence = re.sub(' +',' ',sentence)
        clean_text=clean_text+' '+sentence
    return clean_text

Unnamed: 0,Headline,Body ID,Stance,articleBody
0,Police find mass graves with at least '15 bodi...,712,unrelated,Danny Boyle is directing the untitled film\n\n...
1,Seth Rogen to Play Apple’s Steve Wozniak,712,discuss,Danny Boyle is directing the untitled film\n\n...
2,Mexico police find mass grave near site 43 stu...,712,unrelated,Danny Boyle is directing the untitled film\n\n...
3,Mexico Says Missing Students Not Found In Firs...,712,unrelated,Danny Boyle is directing the untitled film\n\n...
4,New iOS 8 bug can delete all of your iCloud do...,712,unrelated,Danny Boyle is directing the untitled film\n\n...
5,Return of the Mac: Seth Rogen in talks to star...,712,discuss,Danny Boyle is directing the untitled film\n\n...
6,Seth Rogen Is Woz,712,discuss,Danny Boyle is directing the untitled film\n\n...
7,Mexico finds 4 more graves at site of suspecte...,712,unrelated,Danny Boyle is directing the untitled film\n\n...
8,Are missing students in mass graves found near...,712,unrelated,Danny Boyle is directing the untitled film\n\n...
9,Mexico prosecutor: Students not in 1st mass gr...,712,unrelated,Danny Boyle is directing the untitled film\n\n...


In [6]:
#train_data
LABELS = ['agree', 'disagree', 'discuss', 'unrelated']
train_body_text = read_bodies('/Users/romilrathi/Desktop/SML/train_bodies.csv')
train_headline , train_stance = read_title_stances('/Users/romilrathi/Desktop/SML/train_stances.csv')

train_stances = []
train_headlines = []
train_body_texts = []

for k,v in train_body_text.items():
    label_index = LABELS.index(train_stance[k])
    train_stances.append(label_index)
    train_headlines.append(train_headline[k])
    train_body_texts.append(train_body_text[k])
    
train_stances = np.asarray(train_stances)

1683

In [7]:
#Test Data
test_body_text = read_bodies('/Users/romilrathi/Desktop/SML/competition_test_bodies.csv')
test_headline , test_stance = read_title_stances('/Users/romilrathi/Desktop/SML/competition_test_stances.csv')

test_stances = []
test_headlines = []
test_body_texts = []

for k,v in test_body_text.items():
    label_index = LABELS.index(test_stance[k])
    test_stances.append(label_index)
    test_headlines.append(test_headline[k])
    test_body_texts.append(test_body_text[k])
    
test_stances = np.asarray(test_stances)

# similarity

In [8]:
def get_similarity_feature(headlines,body_texts,size, vectorizer):
    sim = np.zeros((size, 1))
    for i in range(size):
        headline, body = headlines[i], body_texts[i]
        tfidf = vectorizer.fit_transform([headline, body])
        sim[i] = cosine_similarity(tfidf[0], tfidf[1])
    return sim

In [9]:
train_size = len(train_stances)
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
train_sim = get_similarity_feature(train_headlines, train_body_texts, train_size, tfidf_vectorizer)

In [10]:
test_size = len(test_stances)
test_sim = get_similarity_feature(test_headlines, test_body_texts, test_size, tfidf_vectorizer)

# chi2

In [11]:
vectorizer = CountVectorizer(ngram_range=(1,2), min_df=2, 
                                 stop_words='english')

train_headline = vectorizer.fit_transform(train_headlines)
test_headline = vectorizer.transform(test_headlines)

train_body = vectorizer.fit_transform(train_body_texts)
test_body = vectorizer.transform(test_body_texts)

In [12]:
ch2_headline = SelectKBest(chi2, k=500)
ch2_headline.fit(train_headline, train_stances)

train_headline = ch2_headline.transform(train_headline)
test_headline = ch2_headline.transform(test_headline)

ch2_body = SelectKBest(chi2, k=1000)
ch2_body.fit(train_body, train_stances)

train_body = ch2_body.transform(train_body)
test_body = ch2_body.transform(test_body)

In [13]:
train_body = train_body.toarray()
test_body = test_body.toarray()

# embeddings

In [46]:
# Loading Google's pre-trained Word2Vec model
model = gensim.models.KeyedVectors.load_word2vec_format(datapath("/Users/romilrathi/Downloads/GoogleNews-vectors-negative300.bin.gz"), binary=True)

In [15]:
# fuction to pre-process the data
def pre_processing(data):
    data = word_tokenize(data.lower())
    data = [word for word in data if word not in stopwords.words('english')]
    data = [word for word in data if word in model.vocab]
    return data

In [16]:
# fuction to obtain review feature vectors using average word embeddings
def get_avg_embd_features(text_data):
    text = pre_processing(text_data)
    if text == []:
        features = [0] * 300
    else:
        features = list(np.mean(model[text], axis=0)) 
    return features

In [17]:
train_embd_features = []
train_body_head_similarity = []
train_labels = []

for i in range(len(train_headlines)):
    train_body_embd_feature = get_avg_embd_features(train_body_texts[i])
    train_headline_embd_feature = get_avg_embd_features(train_headlines[i])
    train_body_head_similarity.append(cosine_similarity(csr_matrix(train_headline_embd_feature), csr_matrix(train_body_embd_feature))[0][0])
    train_embd_features.append(train_headline_embd_feature + train_body_embd_feature)
    
train_embd_features = csr_matrix(train_embd_features)
train_embd_features = train_embd_features.toarray()

In [18]:
test_embd_features = []
test_body_head_similarity = []
test_labels = []

for i in range(len(test_headlines)):
    test_body_embd_feature = get_avg_embd_features(test_body_texts[i])
    test_headline_embd_feature = get_avg_embd_features(test_headlines[i])
    test_body_head_similarity.append(cosine_similarity(csr_matrix(test_headline_embd_feature), csr_matrix(test_body_embd_feature))[0][0])
    test_embd_features.append(test_headline_embd_feature + test_body_embd_feature)
    
test_embd_features = csr_matrix(test_embd_features)
test_embd_features = test_embd_features.toarray()

# sentiments

In [19]:
def get_sentiment_scores(data, lmtzr, tokenizer_regex):    
    token = tokenizer_regex.tokenize(data)
    token = [word for word in token if word not in stopwords.words('english')]
    token = [lmtzr.lemmatize(word) for word in token]
    
    pos_scores=[]
    neg_scores=[]
    pos = 0
    neg = 0
    
    for word in token:
        a,b = 0, 0
        syn = wordnet.synsets(word)
        for sy in syn:
            senti = swn.senti_synset(sy.name())
            a += senti.pos_score()
            b += senti.neg_score()
            if len(syn) > 0:
                a = a / len(syn)
                b = b / len(syn)
            pos += a
            neg += b
            pos_scores.append(pos)
            neg_scores.append(neg)
            
    if(len(pos_scores)!=0):
        pos_sum = sum(pos_scores) / len(token)
        neg_sum = sum(neg_scores) / len(token) 
        senti_avg = (pos_sum - neg_sum)
        
    else:
        pos_sum = 0
        neg_sum =0
        senti_avg =0
        
    return pos_sum, neg_sum, senti_avg

In [20]:
lmtzr = WordNetLemmatizer()
tokenizer_regex = RegexpTokenizer(r'\w+')

train_pos_diff = []
train_neg_diff = []
train_avg_senti_diff = []
    
for i in range(len(train_headlines)):
    pos_sum_head, neg_sum_head, senti_avg_head = get_sentiment_scores(train_headlines[i], lmtzr, tokenizer_regex)
    pos_sum_body, neg_sum_body, senti_avg_body= get_sentiment_scores(train_body_texts[i], lmtzr, tokenizer_regex)
    
    train_pos_diff.append(pos_sum_head - pos_sum_body)
    train_neg_diff.append(neg_sum_head - neg_sum_body)
    train_avg_senti_diff.append(senti_avg_head - senti_avg_body)
    
train_pos_diff = np.array(train_pos_diff).astype(np.float)
train_neg_diff = np.array(train_neg_diff).astype(np.float)
train_avg_senti_diff = np.array(train_avg_senti_diff).astype(np.float)

In [21]:
test_pos_diff = []
test_neg_diff = []
test_avg_senti_diff = []
    
for i in range(len(test_headlines)):
    pos_sum_head, neg_sum_head, senti_avg_head = get_sentiment_scores(test_headlines[i], lmtzr, tokenizer_regex)
    pos_sum_body, neg_sum_body, senti_avg_body= get_sentiment_scores(test_body_texts[i], lmtzr, tokenizer_regex)
    
    test_pos_diff.append(pos_sum_head - pos_sum_body)
    test_neg_diff.append(neg_sum_head - neg_sum_body)
    test_avg_senti_diff.append(senti_avg_head - senti_avg_body)
    
test_pos_diff = np.array(test_pos_diff).astype(np.float)
test_neg_diff = np.array(test_neg_diff).astype(np.float)
test_avg_senti_diff = np.array(test_avg_senti_diff).astype(np.float)

## Sriram

In [42]:
NEGATE = [
    "aint",
    "arent",
    "cannot",
    "cant",
    "couldnt",
    "darent",
    "didnt",
    "doesnt",
    "ain't",
    "aren't",
    "can't",
    "couldn't",
    "daren't",
    "didn't",
    "doesn't",
    "dont",
    "hadnt",
    "hasnt",
    "havent",
    "isnt",
    "mightnt",
    "mustnt",
    "neither",
    "don't",
    "hadn't",
    "hasn't",
    "haven't",
    "isn't",
    "mightn't",
    "mustn't",
    "neednt",
    "needn't",
    "never",
    "none",
    "nope",
    "nor",
    "not",
    "nothing",
    "nowhere",
    "oughtnt",
    "shant",
    "shouldnt",
    "uhuh",
    "wasnt",
    "werent",
    "oughtn't",
    "shan't",
    "shouldn't",
    "uh-uh",
    "wasn't",
    "weren't",
    "without",
    "wont",
    "wouldnt",
    "won't",
    "wouldn't",
    "rarely",
    "seldom",
    "despite"
]

In [53]:
def getFeatures(headlines, body):
    unigram_overlaps = []
    bigram_overlaps = []
    negation_word_count = []
    
    for i in range(len(headlines)):
        unigrams_overlap = 0
        bigrams_overlap = 0
        negation_words = 0

        headline_tokenize = word_tokenize(headlines[i])
        body_tokenize = word_tokenize(body[i])
        bigram = list(nltk.bigrams(body_tokenize))
        
        for w in range(len(headline_tokenize)):
            if(headline_tokenize[w] in NEGATE):
                negation_words+=1
            if(w==0):
                if(headline_tokenize[w] in body_tokenize):
                    unigrams_overlap +=1
            else:
                if(headline_tokenize[w] in body_tokenize):
                    unigrams_overlap +=1
                headline_bigram = (headline_tokenize[w-1], headline_tokenize[w])
                if(headline_bigram in bigram):
                    bigrams_overlap+=1
                    
        unigram_overlaps.append(unigrams_overlap)
        bigram_overlaps.append(bigrams_overlap)
        negation_word_count.append(negation_words)
    
    return unigram_overlaps, bigram_overlaps, negation_word_count
    

In [55]:
train_unigram_overlaps, train_bigram_overlaps, train_neation_word_count = getFeatures(train_headlines, train_body_texts)
test_unigram_overlaps, test_bigram_overlaps, test_neation_word_count = getFeatures(test_headlines, test_body_texts)

# score calculation

In [106]:
LABELS = ['agree', 'disagree', 'discuss', 'unrelated']
LABELS_RELATED = ['unrelated','related']
RELATED = LABELS[0:3]

def score_submission(gold_labels, test_labels):
    score = 0.0
    cm = [[0, 0, 0, 0],
          [0, 0, 0, 0],
          [0, 0, 0, 0],
          [0, 0, 0, 0]]

    for i, (g, t) in enumerate(zip(gold_labels, test_labels)):
        g_stance, t_stance = g, t
        if g_stance == t_stance:
            score += 0.25
            if g_stance != 'unrelated':
                score += 0.50
        if g_stance in RELATED and t_stance in RELATED:
            score += 0.25

        cm[LABELS.index(g_stance)][LABELS.index(t_stance)] += 1

    return score, cm

In [107]:
def print_confusion_matrix(cm):
    lines = []
    header = "|{:^11}|{:^11}|{:^11}|{:^11}|{:^11}|".format('', *LABELS)
    line_len = len(header)
    lines.append("-"*line_len)
    lines.append(header)
    lines.append("-"*line_len)

    hit = 0
    total = 0
    for i, row in enumerate(cm):
        hit += row[i]
        total += sum(row)
        lines.append("|{:^11}|{:^11}|{:^11}|{:^11}|{:^11}|".format(LABELS[i],
                                                                   *row))
        lines.append("-"*line_len)
    print('\n'.join(lines))

In [103]:
def report_score(actual,predicted):
    score,cm = score_submission(actual,predicted)
    best_score, _ = score_submission(actual,actual)

    print_confusion_matrix(cm)
    print("Score: " +str(score) + " out of " + str(best_score) + "\t("+str(score*100/best_score) + "%)")

In [91]:
train_data = np.column_stack((train_body, train_sim, train_pos_diff, train_neg_diff, train_avg_senti_diff, train_unigram_overlaps))

In [95]:
test_data = np.column_stack((test_body, test_sim, test_pos_diff, test_neg_diff, test_avg_senti_diff,  test_unigram_overlaps))

In [96]:
classifier = RandomForestClassifier(n_estimators=100, random_state=5)
classifier.fit(train_data, train_stances)
    
ensemble_prediction = classifier.predict(test_data)

In [97]:
actual_label = [LABELS[x] for x in test_stances]
predicted_label = [LABELS[x] for x in ensemble_prediction]
report_score(actual_label, predicted_label)

-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |    78     |     0     |    66     |    13     |
-------------------------------------------------------------
| disagree  |    23     |     0     |    20     |     7     |
-------------------------------------------------------------
|  discuss  |    59     |     0     |    193    |    21     |
-------------------------------------------------------------
| unrelated |     3     |     0     |    14     |    407    |
-------------------------------------------------------------
Score: 414.75 out of 586.0	(70.7764505119454%)


In [98]:
#result using sklearn 
from sklearn.linear_model import LogisticRegression

In [99]:
clf = LogisticRegression(fit_intercept=True, C = 100,multi_class = 'ovr')
clf.fit(train_data,train_stances)
pred = clf.predict(X=test_data)

In [100]:
actual_label = [LABELS[x] for x in test_stances]
predicted_label = [LABELS[x] for x in pred]
report_score(actual_label, predicted_label)

-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |    18     |    33     |    14     |    92     |
-------------------------------------------------------------
| disagree  |     9     |     6     |     7     |    28     |
-------------------------------------------------------------
|  discuss  |    20     |    46     |    32     |    175    |
-------------------------------------------------------------
| unrelated |     9     |     1     |    23     |    391    |
-------------------------------------------------------------
Score: 186.0 out of 586.0	(31.74061433447099%)


In [1]:
from keras.layers import Dense,LSTM
from keras.utils import to_categorical
from keras.models import Sequential
from keras import optimizers
import keras
import pandas as pd
import gensim
import nltk
from nltk.tokenize import word_tokenize
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

#the designated length for the size of headlines and body text in the input tensor
lengthForBody = 424
lengthForHeadline = 45

import numpy as np
import pandas as pd
import os
from nltk.tokenize import word_tokenize
import gensim

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [28]:
#train_data
LABELS = ['agree', 'disagree', 'discuss', 'unrelated']
train_body_text = read_bodies('/Users/romilrathi/Desktop/SML/train_bodies.csv')
train_headline , train_stance = read_title_stances('/Users/romilrathi/Desktop/SML/train_stances.csv')

train_stances = []
train_headlines = []
train_body_texts = []

for k,v in train_body_text.items():
    label_index = LABELS.index(train_stance[k])
    train_stances.append(label_index)
    train_headlines.append(train_headline[k])
    train_body_texts.append(train_body_text[k])
    
train_stances = np.asarray(train_stances)

In [29]:
#Test Data
test_body_text = read_bodies('/Users/romilrathi/Desktop/SML/competition_test_bodies.csv')
test_headline , test_stance = read_title_stances('/Users/romilrathi/Desktop/SML/competition_test_stances.csv')

test_stances = []
test_headlines = []
test_body_texts = []

for k,v in test_body_text.items():
    label_index = LABELS.index(test_stance[k])
    test_stances.append(label_index)
    test_headlines.append(test_headline[k])
    test_body_texts.append(test_body_text[k])
    
test_stances = np.asarray(test_stances)

In [32]:
for index in range(len(train_headlines)): #len(headlines)
    line = train_headlines[index]
    train_headlines[index] = word_tokenize(line)

In [33]:
for index in range(len(test_headlines)): #len(headlines)
    line = test_headlines[index]
    test_headlines[index] = word_tokenize(line)

In [35]:
train_headlineList = []

for eachSentence in train_headlines[0:len(train_headlines)]:
    headlineLi = []
    print(eachSentence)
    for eachword in eachSentence:
        try:
            headlineLi.append(model[eachword])
            #the type of word vectors is <type 'numpy.ndarray'>
        except:
            pass
    train_headlineList.append(headlineLi)

train_headlineList = np.array(train_headlineList)

['Italian', 'fisherman', 'catches', 'monstrous', '280-pound', 'catfish']
['It', 'Begins', ':', 'HazMat-Wearing', 'Passenger', 'Spotted', 'At', 'Airport']
['WHO', '‘', 'probing', '’', 'whether', 'ISIS', 'fighters', 'got', 'Ebola']
['‘', 'The', 'cub', 'of', 'Baghdadi', "'", ':', 'ISIS', 'reports', 'its', 'youngest', 'jihadist', '‘', 'got', 'martyred', '’', 'in', 'battle']
['Islamic', 'State', 'using', "'SCORPION", 'bombs', "'", 'to', 'terrorise', 'victims']
['Senior', 'Western', 'Intelligence', 'Official', 'Confirms', 'London', 'Rapper', 'Abdel', 'Majed', 'Abdel', 'Bary', 'Is', 'A', 'Suspect', 'In', 'The', 'Foley', 'Killing']
['Small', 'Meteorite', 'Strikes', 'in', 'Nicaragua', "'s", 'Capital', 'City', 'of', 'Managua']
['Nicaragua', 'meteorite', ':', 'Experts', 'attempt', 'to', 'understand', 'whether', 'mysterious', 'meteorite', 'fell', 'from', 'passing', 'Pitbull', 'asteroid']
['ISIL', 'allegedly', 'kills', 'US', 'journalist', 'in', 'video']
['How', 'did', 'a', 'Texas', 'plumber', "'s",

In [102]:
test_headlineList = []

for eachSentence in test_headlines[0:len(test_headlines)]:
    headlineLi = []
    print(eachSentence)
    for eachword in eachSentence:
        try:
            headlineLi.append(model[eachword])
            #the type of word vectors is <type 'numpy.ndarray'>
        except:
            pass
    test_headlineList.append(headlineLi)

test_headlineList = np.array(test_headlineList)
test_headlineList

['NHL', 'expansion', 'to', 'include', 'Toronto', ',', 'Quebec', 'City', ',', 'Seattle', 'and', 'Las', 'Vegas', ':', 'Report']
['Sketchy', 'Rumor', 'Claims', 'Apple', 'Planning', 'New', '4-Inch', 'iPhone', 'for', '2015']
['ISIS', 'Is', 'One', 'Mile', 'From', 'Baghdad']
['Transgender', 'Teen', 'Commits', 'Suicide', '&', 'Pens', 'A', 'Heartbreaking', 'Note', 'Urging', 'The', 'World', 'To', 'Change']
['Iraqi', 'Army', 'Downs', 'Two', 'British', 'Planes', 'Carrying', 'Weapons', 'for', 'ISIL', 'Terrorists']
['LG', 'Display', ',', 'Samsung', 'unit', 'to', 'supply', 'Apple', 'Watch', 'screens', ':', 'Report']
['Ohio', 'Trans', 'Teen', 'Commits', 'Suicide', ',', 'Leaves', 'Heartbreaking', 'Note', 'Declaring', '“', 'There', '’', 's', 'No', 'Way', 'Out', '”']
['[', 'Google', 'translate', ']', 'AED', 'site', ':', 'Family', '``', 'Flyer', "''", 'Maryam', 'Mansouri', 'emphasizes', '``', 'fabricating', "''", 'a', 'statement', 'repudiating', 'them']
['Is', 'this', 'a', 'Crabzilla', 'spotted', 'in', 't

array([], shape=(904, 0), dtype=float64)

In [37]:
#padding the headline, since the length of each headline is not equal
train_headlineList = pad_sequences(train_headlineList,padding='post',maxlen=lengthForHeadline,value=0.0,dtype='float32')

#padding the headline, since the length of each headline is not equal
test_headlineList = pad_sequences(test_headlineList,padding='post',maxlen=lengthForHeadline,value=0.0,dtype='float32')

In [38]:
train_headlineList.shape

(1683, 45)

In [39]:
test_headlineList.shape

(904, 45)

In [40]:
train_bodyTextList = []
for eachSentence in train_body_texts[0:len(train_body_texts)]:
    bodyTextLi = []
    #print eachSentence
    for eachword in eachSentence:
        try:
            bodyTextLi.append(model[eachword])
            #the type of word vectors is <type 'numpy.ndarray'>
        except:
            pass
    train_bodyTextList.append(bodyTextLi)

train_bodyTextList = np.array(train_bodyTextList)

In [41]:
test_bodyTextList = []
for eachSentence in test_body_texts[0:len(test_body_texts)]:
    bodyTextLi = []
    #print eachSentence
    for eachword in eachSentence:
        try:
            bodyTextLi.append(model[eachword])
        except:
            pass
    test_bodyTextList.append(bodyTextLi)

test_bodyTextList = np.array(test_bodyTextList)

In [42]:
#padding the body text, since the length of each body text is not equal
train_bodyTextList = pad_sequences(train_bodyTextList,padding='post',maxlen=lengthForBody,value=0.0,dtype='float32')

#padding the body text, since the length of each body text is not equal
test_bodyTextList = pad_sequences(test_bodyTextList,padding='post',maxlen=lengthForBody,value=0.0,dtype='float32')

In [43]:
x_train = np.append(train_headlineList,train_bodyTextList,axis=1)
y_train = train_stances[0:len(train_headlineList)]

x_test = np.append(test_headlineList,test_bodyTextList,axis=1)
y_test = test_stances[0:len(test_headlineList)]

In [101]:
y_train = to_categorical(y_train,num_classes=4)
y_test = to_categorical(y_test,num_classes=4)

In [85]:
#set the size for input tensor
lstm_output = 4
time_steps = lengthForBody + lengthForHeadline #number of column
input_vector = 300

#build model
model = Sequential()
model.add(LSTM(units=lstm_output,input_shape=(time_steps,input_vector)))
model.add(layers.Dropout(0.3))
model.add(Dense(4,activation='softmax',name = 'dense1'))


sgd = optimizers.SGD(lr=0.01, momentum=0., decay=0., nesterov=False)
model.compile(loss='categorical_crossentropy',optimizer=sgd ,metrics=['accuracy'])

model.fit(x_train,y_train,epochs=50,batch_size=5000,verbose=1)

score=model.evaluate(x_test,y_test,batch_size=1000,verbose=1)
print(score)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[1.1559879779815674, 0.8050000071525574]


In [111]:
len(y_pred)

400