# Neural Network

In [1]:
# import modules
import time
import pandas as pd
import numpy as np
from __future__ import print_function
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import TomekLinks
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.optimizers import Adam, SGD
from sklearn.model_selection import train_test_split

In [2]:
# import dataset
df = pd.read_csv('../data/dataframe.csv', index_col=0)

df.head()

Unnamed: 0,tweet,product_,emotion,lemmatizer_tweets,textblob_polarity,textblob_subjectivity,vs_neg,vs_neu,vs_pos,vs_compound,nrc_sentiment,gi_sentiment,henry_sentiment,huliu_sentiment,jockers_sentiment,lm_sentiment,senticnet_sentiment,sentiword_sentiment,socal_sentiment,product_agg
0,"['wesley83', 'have', '3G', 'iPhone', '3', 'hrs...",iPhone,0,wesley83 have 3G iPhone 3 hr tweeting RISE Aus...,-0.2,0.4,0.223,0.777,0.0,-0.6486,0.0,-0.333333,0.0,-1.0,-1.0,0.0,-0.0952,-0.221875,-1.192154,Apple
1,"['jessedee', 'Know', 'fludapp', 'Awesome', 'iP...",iPad or iPhone App,1,jessedee Know fludapp Awesome iPad iPhone app ...,0.466667,0.933333,0.0,0.528,0.472,0.91,1.0,1.0,0.0,1.0,0.416667,0.0,0.475,0.175,2.17719,Apple
2,"['swonderlin', 'not', 'wait', 'iPad', '2', 'al...",iPad,1,swonderlin not wait iPad 2 also should sale do...,-0.155556,0.288889,0.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-0.625,-1.0,-0.3055,-0.289062,-1.0,Apple
3,"['sxsw', 'hope', 'year', 'festival', 't', 'cra...",iPad or iPhone App,0,sxsw hope year festival t crashy this year iPh...,0.0,0.0,0.0,0.596,0.404,0.7269,1.0,1.0,0.0,0.0,0.5,0.0,0.0716,0.25,2.841547,Apple
4,"['sxtxstate', 'great', 'stuff', 'Fri', 'SXSW',...",Google,1,sxtxstate great stuff Fri SXSW Marissa Mayer G...,0.8,0.75,0.0,0.796,0.204,0.6249,0.0,1.0,0.0,1.0,0.5,1.0,0.55125,0.083333,1.554026,Google


In [3]:
# update output
df['emotion'] = np.where(df['emotion'] == 4, 2, df['emotion'])
# specify data and target
data = df.drop(columns=['emotion', 'tweet', 'product_', 'lemmatizer_tweets', 'product_agg'])
target = df['emotion']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.20, random_state=1)
# look at shpae of input
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(7273, 15)
(1819, 15)
(7273,)
(1819,)


#### Iteration 1

In [5]:
# X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.20, random_state=1)

batch_size = 8 # how many folds to separate data
num_classes = 4 # how many classes in outcomes
epochs = 15

X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
y_train = y_train.astype('uint8')
y_test = y_test.astype('uint8')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# specifying the model structure
model = Sequential()

# specify the first hidden layer
model.add(Dense(100, activation='relu', input_shape=(15,)))
model.add(Dropout(0.2))

# specify the second layer
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))

# # specify the third layer
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))

# specify the output layer
model.add(Dense(num_classes, activation='softmax')) # switched linear to sofmax

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=SGD(),
              metrics=['accuracy'])


history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(X_test, y_test))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               1600      
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 4

In [6]:
# print metrics
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.8815038204193115
Test accuracy: 0.6157229542732239


In [7]:
# visualization tool
from ann_visualizer.visualize import ann_viz;
ann_viz(model, title="Sentiment Neural Net")

#### Iteration 2: Tomek Links

In [8]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.20, random_state=1)

tl = TomekLinks()
X_train, y_train = tl.fit_resample(X_train, y_train)

batch_size = 8 # how many folds to separate data
num_classes = 4 # how many classes in outcomes
epochs = 15

X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
y_train = y_train.astype('uint8')
y_test = y_test.astype('uint8')


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# specifying the model structure
model = Sequential()

# specify the first hidden layer
model.add(Dense(100, activation='relu', input_shape=(15,)))
model.add(Dropout(0.2))

# specify the second layer
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))

# # specify the third layer
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))

# specify the output layer
model.add(Dense(num_classes, activation='softmax')) # switched linear to sofmax

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=SGD(),
              metrics=['accuracy'])


history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(X_test, y_test))

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 100)               1600      
_________________________________________________________________
dropout_3 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_4 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_5 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 4)                

In [9]:
# print metrics
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.8907440900802612
Test accuracy: 0.6157229542732239


#### Iteration 3: SMOTE

In [10]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.20, random_state=1)

sm = SMOTE( random_state=23)
X_train, y_train = sm.fit_sample(X_train, y_train)

batch_size = 8 # how many folds to separate data
num_classes = 4 # how many classes in outcomes
epochs = 15

X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
y_train = y_train.astype('uint8')
y_test = y_test.astype('uint8')


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# specifying the model structure
model = Sequential()

# specify the first hidden layer
model.add(Dense(100, activation='relu', input_shape=(15,)))
model.add(Dropout(0.2))

# specify the second layer
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))

# # specify the third layer
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))

# specify the output layer
model.add(Dense(num_classes, activation='softmax')) # switched linear to sofmax

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=SGD(),
              metrics=['accuracy'])


history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(X_test, y_test))

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 100)               1600      
_________________________________________________________________
dropout_6 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_7 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 100)               10100     
_________________________________________________________________
dropout_8 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 4)                

In [11]:
# print metrics
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 1.156239628791809
Test accuracy: 0.47883450984954834
