### Loading Data

In [1]:
import pandas as pd

In [2]:
Train = pd.read_csv("train.csv")
Test  = pd.read_csv("test.csv")

In [3]:
Train.head()

Unnamed: 0,OriginalTweet,Sentiment
0,@MeNyrbie @Phil_Gahan @Chrisitv https://t.co/i...,Neutral
1,advice Talk to your neighbours family to excha...,Positive
2,Coronavirus Australia: Woolworths to give elde...,Positive
3,My food stock is not the only one which is emp...,Positive
4,"Me, ready to go at supermarket during the #COV...",Extremely Negative


In [4]:
Test.head()

Unnamed: 0,OriginalTweet,Sentiment
0,TRENDING: New Yorkers encounter empty supermar...,Extremely Negative
1,When I couldn't find hand sanitizer at Fred Me...,Positive
2,Find out how you can protect yourself and love...,Extremely Positive
3,#Panic buying hits #NewYork City as anxious sh...,Negative
4,#toiletpaper #dunnypaper #coronavirus #coronav...,Neutral


In [5]:
# Dropping empty rows
Train = Train.dropna()
Test  = Test.dropna()

In [6]:
Train["Sentiment"].unique()

array(['Neutral', 'Positive', 'Extremely Negative', 'Negative',
       'Extremely Positive'], dtype=object)

In [7]:
Train.shape

(41155, 2)

In [8]:
Train["OriginalTweet"][0]

'@MeNyrbie @Phil_Gahan @Chrisitv https://t.co/iFz9FAn2Pa and https://t.co/xX6ghGFzCC and https://t.co/I2NlzdxNo8'

### Pre-processing Text

#### Lower-casing

In [None]:
for i in Train.index:
    Train["OriginalTweet"][i] = Train["OriginalTweet"][i].lower()

In [None]:
for i in Test.index:
    Test["OriginalTweet"][i] = Test["OriginalTweet"][i].lower()

#### Expand contractions

In [None]:
from pycontractions import Contractions

In [None]:
# Loading Contraction Data
cont = Contractions('GoogleNews-vectors-negative300.bin')

In [None]:
cont.load_models()

In [None]:
for i in Train.index:
    Train["OriginalTweet"][i] = str(list(cont.expand_texts([Train["OriginalTweet"][i]], precise=True)))

In [None]:
for i in Test.index:
    Test["OriginalTweet"][i] = str(list(cont.expand_texts([Test["OriginalTweet"][i]], precise=True)))

#### Remove Special characters (includes double-quotes)

In [None]:
import string

In [None]:
special = set(string.punctuation)

In [None]:
for i in Train.index:
    temp = ""
    for j in Train["OriginalTweet"][i]:
        if j not in special:
            temp = temp + j
    Train["OriginalTweet"][i] = temp

In [None]:
for i in Test.index:
    temp = ""
    for j in Test["OriginalTweet"][i]:
        if j not in special:
            temp = temp + j
    Test["OriginalTweet"][i] = temp

#### Removing Numbers

In [None]:
from string import digits

In [None]:
remove_digits_english = str.maketrans('', '', digits)

In [None]:
for i in Train.index:
    Train["OriginalTweet"][i] = Train["OriginalTweet"][i].translate(remove_digits_english)

In [None]:
for i in Test.index:
    Test["OriginalTweet"][i] = Test["OriginalTweet"][i].translate(remove_digits_english)

#### Removing Extra spaces

In [None]:
for i in Train.index:
    text = Train["OriginalTweet"][i].strip()
    Train["OriginalTweet"][i] = " ".join(text.split())

In [None]:
for i in Test.index:
    text = Test["OriginalTweet"][i].strip()
    Test["OriginalTweet"][i] = " ".join(text.split())

### Saving File

In [None]:
Train.to_csv('train_processed.csv',index=False)
Test.to_csv('test_processed.csv',index=False)