In [1]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
import pandas as pd
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import numpy as np
from tqdm import trange

In [2]:
# Import Spacy English Library
nlp = spacy.load('en_core_web_sm')

In [3]:
dataset = pd.read_csv('test.csv')

In [4]:
dataset.head()

Unnamed: 0,id,keyword,location,text
0,0,,,Just happened a terrible car crash
1,2,,,"Heard about #earthquake is different cities, s..."
2,3,,,"there is a forest fire at spot pond, geese are..."
3,9,,,Apocalypse lighting. #Spokane #wildfires
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan


In [5]:
# Dropping Non-Predictive Columns
dataset_id = dataset['id']
dataset.drop(['id','keyword','location'], inplace=True, axis=1)

In [6]:
dataset.head()

Unnamed: 0,text
0,Just happened a terrible car crash
1,"Heard about #earthquake is different cities, s..."
2,"there is a forest fire at spot pond, geese are..."
3,Apocalypse lighting. #Spokane #wildfires
4,Typhoon Soudelor kills 28 in China and Taiwan


In [7]:
tweets = dataset['text'].values

In [8]:
# Performing Lemmetization
for i in trange(len(tweets), desc='Lemma Loop'):
    sentence = nlp(tweets[i])
    modified_sentence = " ".join([token.lemma_ for token in sentence])
    tweets[i] = modified_sentence

Lemma Loop: 100%|██████████████████████████████████████████████████████████████████| 3263/3263 [00:52<00:00, 61.62it/s]


In [9]:
# Performing Vectorization
# Loading Vectorizer Object
import joblib
vectorizer = joblib.load('bow_vectorizer.pkl')

tweets = vectorizer.transform(tweets)
tweets = tweets.toarray()

In [10]:
# Loading Model from Disk
from keras.models import model_from_json
json_model_file = open('BOW_ML_Models/bw_model.json', 'r')
json_model = json_model_file.read()
json_model_file.close()
model = model_from_json(json_model)

# Loading Model Weights
model.load_weights('BOW_ML_Models/bw_model.h5')

Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.


In [11]:
model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics = ['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 64)                1167424   
_________________________________________________________________
dense_7 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_8 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_9 (Dense)              (None, 4)                 68        
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 5         
Total params: 1,170,105
Trainable params: 1,170,105
Non-trainable params: 0
_________________________________________________________________


In [12]:
predictions = model.predict_classes(tweets)

In [13]:
print('Shape of Predictions :',predictions.shape)

Shape of Predictions : (3263, 1)


In [14]:
# Making Submission CSV File
submissions = pd.DataFrame()
submissions['id'] = dataset_id
submissions['target'] = predictions

In [15]:
submissions.head()

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,1
4,11,1


In [16]:
print('Submission Dataframe Shape :',submissions.shape)

Submission Dataframe Shape : (3263, 2)


In [17]:
# Saving Dataframe to CSV File
submissions.to_csv('bow_submissions.csv', index=False)