In [3]:
import numpy as np
import pandas as pd
import spacy
from spacy import displacy
from spacy.util import minibatch, compounding
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import re
import string
import time
from spacy.tokens import DocBin

In [29]:
train_raw_data = pd.read_csv("Dataset/Corona_NLP_train.csv")
test_raw_data = pd.read_csv("Dataset/Corona_NLP_test.csv")

In [46]:
train_raw_data.head()

Unnamed: 0,UserName,ScreenName,Location,TweetAt,OriginalTweet,Sentiment
0,3799,48751,London,16-03-2020,@MeNyrbie @Phil_Gahan @Chrisitv https://t.co/i...,Neutral
1,3800,48752,UK,16-03-2020,advice Talk to your neighbours family to excha...,Positive
2,3801,48753,Vagabonds,16-03-2020,Coronavirus Australia: Woolworths to give elde...,Positive
3,3802,48754,,16-03-2020,My food stock is not the only one which is emp...,Positive
4,3803,48755,,16-03-2020,"Me, ready to go at supermarket during the #COV...",Extremely Negative


In [51]:
train_data = train_raw_data[['OriginalTweet','Sentiment']].dropna()
test_data = test_raw_data[['OriginalTweet','Sentiment']].dropna()

In [52]:
train_data.head()

Unnamed: 0,OriginalTweet,Sentiment
0,@MeNyrbie @Phil_Gahan @Chrisitv https://t.co/i...,Neutral
1,advice Talk to your neighbours family to excha...,Positive
2,Coronavirus Australia: Woolworths to give elde...,Positive
3,My food stock is not the only one which is emp...,Positive
4,"Me, ready to go at supermarket during the #COV...",Extremely Negative


In [64]:
train_data.shape

(41157, 2)

In [53]:
sentiment = {"Extremely Positive":"Positive", "Extremely Negative":"Negative",
         "Positive":"Positive","Negative":"Negative","Neutral":"Neutral"}

In [54]:
train_data["Sentiment"] = train_data.Sentiment.map(sentiment)
test_data["Sentiment"] = test_data.Sentiment.map(sentiment)

In [55]:
train_data.Sentiment.unique()

array(['Neutral', 'Positive', 'Negative'], dtype=object)

In [56]:
def remove_emoji(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

def remove_url(text): 
    url_pattern  = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    return url_pattern.sub(r'', text)
 # converting return value from list to string



def clean_text(text ): 
    delete_dict = {sp_character: '' for sp_character in string.punctuation} 
    delete_dict[' '] = ' ' 
    table = str.maketrans(delete_dict)
    text1 = text.translate(table)
    textArr= text1.split()
    text2 = ' '.join([w for w in textArr if ( not w.isdigit() and  ( not w.isdigit() and len(w)>3))]) 
    
    return text2.lower()

In [57]:
train_data.OriginalTweet = train_data.OriginalTweet.apply(remove_emoji)
train_data.OriginalTweet = train_data.OriginalTweet.apply(remove_url)
train_data.OriginalTweet = train_data.OriginalTweet.apply(clean_text)
test_data.OriginalTweet = test_data.OriginalTweet.apply(remove_emoji)
test_data.OriginalTweet = test_data.OriginalTweet.apply(remove_url)
test_data.OriginalTweet = test_data.OriginalTweet.apply(clean_text)

In [58]:
train_data.head()

Unnamed: 0,OriginalTweet,Sentiment
0,menyrbie philgahan chrisitv,Neutral
1,advice talk your neighbours family exchange ph...,Positive
2,coronavirus australia woolworths give elderly ...,Positive
3,food stock only which empty please dont panic ...,Positive
4,ready supermarket during covid19 outbreak beca...,Negative


In [38]:
train_data['tuples'] = train_data.apply(lambda row: (row['OriginalTweet'], row['Sentiment']), axis=1)
train_spacy = train_data.tuples.tolist()

test_data['tuples'] = test_data.apply(lambda row: (row['OriginalTweet'], row['Sentiment']), axis=1)
test_spacy = test_data.tuples.tolist()

In [59]:
train_spacy[10]

('month there hasnt been crowding supermarkets restaurants however reducing hours closing malls means everyone using same entrance dependent single supermarket manila lockdown covid2019 philippines',
 'Neutral')

In [92]:
def make_docs(data):
    docs = []
    for doc, label in nlp.pipe(data, as_tuples=True):
#         print("label is ",label,"doc is", doc)
        if label == 'Positive':
            doc.cats['Positive'] =  1
            doc.cats['Negative'] =  0
            doc.cats['Neutral']  =  0
        elif label == 'Negative':
            doc.cats['Positive'] =  0
            doc.cats['Negative'] =  1
            doc.cats['Neutral']  =  0
        else:
            doc.cats['Positive'] =  0
            doc.cats['Negative'] =  0
            doc.cats['Neutral']  =  1
#         print(doc.cats)
        docs.append(doc)
    return (docs)

In [94]:
nlp = spacy.load("en_core_web_sm")
num_texts = 41157 #size of train dataset


train_docs = make_docs(train_spacy[:num_texts])
doc_bin = DocBin(docs=train_docs)
doc_bin.to_disk("E:\\Projects\\Coronavirus_tweets_NLP-Text_Classification\\Dataset\\train.spacy")

test_docs = make_docs(test_spacy[:num_texts])
doc_bin = DocBin(docs=test_docs)
doc_bin.to_disk("E:\\Projects\\Coronavirus_tweets_NLP-Text_Classification\\Dataset\\test.spacy")

In [4]:
model = spacy.load("output/model-last")

In [47]:
test_pred = model("Government is not helping us in this  situation")

In [48]:
test_pred.cats

{'Positive': 0.9365926384925842,
 'Negative': 0.014406421221792698,
 'Neutral': 0.04900091886520386}