# Twitter Sentiment Analysis Using Covolutional Neural Networks  

### Download Data

In [None]:
%%bash 
rm -r Data
mkdir Data
cd Data
kg dataset -o shashank1558 -d preprocessed-twitter-tweets

### Preprocess Data

Converting word vector list to list of vectors.

In [None]:
def datatovec(vec):
    main = vec
    if os.path.exists('./Models/TwitterTrain2/wordmodel'):
        tweet_model = Word2Vec.load("Models/TwitterTrain2/wordmodel")
    else:
        tweet_model = Word2Vec(vec, min_count=1, size=100) 
        tweet_model.save("Models/TwitterTrain2/wordmodel")
    for i,tweets in enumerate(main):
        for j,word in enumerate(tweets):
            main[i][j]= tweet_model.wv[word]
    return main

In [None]:
import pandas as pd
from gensim.models import Word2Vec
from keras.preprocessing.text import text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
import pickle
import os
import numpy as np


def get_data():
    x = []
    y = []
    
    if os.path.exists('./Data/x.pickle'):
        print("Pre-processed data already exists...")
        x = pickle.load(open("./Data/x.pickle","rb"))
        y = pickle.load(open("./Data/y.pickle","rb"))
    else:
        print("Preprocessed data does not exist....")
        positive_csv = pd.read_csv("./Data/processedPositive.csv")
        negative_csv = pd.read_csv("./Data/processedNegative.csv")
        neutral_csv = pd.read_csv("./Data/processedNeutral.csv")
        for column in positive_csv:
            x.append(text_to_word_sequence(
            positive_csv[column].name,
            filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
            split=" ",
            lower=True
        ))
            y.append([0,0,1])
            
        for column in negative_csv:
            x.append(text_to_word_sequence(
            negative_csv[column].name,
            filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
            split=" ",
            lower=True
        ))
            y.append([1,0,0])
            
        for column in neutral_csv:
            x.append(text_to_word_sequence(
            neutral_csv[column].name,
            filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
            split=" ",
            lower=True
        ))
            y.append([0,1,0])
        print("Data to vec")
        x = datatovec(x)
        print("Padding Sequences")
        x  = pad_sequences(x,dtype="float32")
        y  = pad_sequences(y)
        with open("./Data/x.pickle",'wb') as f:
            pickle.dump(x,f)
        with open("./Data/y.pickle",'wb') as f:
            pickle.dump(y,f)
        print("Data Preprocessed and Saved...")
        
    data = {
        "x":x,
        "y":y
    }
    return data

In [None]:
data = get_data()
x = data["x"]
y = data["y"]
del data

### Randomize and split data into training and test dataset

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.03, shuffle=True)

Check data and its shape

In [None]:
x_train.shape

In [None]:
y_train

### Train Model

In [None]:
from keras.preprocessing.text import text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers.core import Dense,Activation
from keras.layers.embeddings import Embedding
from keras.layers import SpatialDropout1D
from keras.layers.recurrent import LSTM
from keras.layers import Embedding
from keras.layers.convolutional import Conv2D
from keras.layers.core import Reshape
from keras.layers.pooling import MaxPooling2D
from keras.layers.core import Dropout,Flatten
import keras
model = Sequential()
model.add(Reshape((x[0].shape[0],x[0].shape[1],1),input_shape=x[0].shape))
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=x[0].shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.summary()

In [None]:
print("Training Model")
model.fit(x_train, y=y_train, epochs=100, verbose=1,
          validation_split=0.2, shuffle=True)
madel.save("Trained.h5")
print("Model Trained")

In [None]:
model.predict(x=x_test)

In [None]:
y_test