# FastText Model for classification of tweet sentiment

---

# 1. Installs and imports

## 1.1. Install all required libraries

In [1]:
# Uncomment line below to install all required libraries
# !pip3 install -r ../requirements.txt -q

# Uncomment lines below to install and initiate all required libraries

# !cd SavedModels && wget https://github.com/facebookresearch/fastText/archive/v0.1.0.zip
# !cd SavedModels && unzip v0.1.0.zip
# !cd SavedModels && cd fastText-0.1.0 && make
# !cd SavedModels && cd fastText-0.1.0 && ./fasttext
# !pip3 install fastText

## 1.2. Import required libraries

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from fasttext import load_model

---

# 2. Load cleaned tweets dataset

In [3]:
df = pd.read_csv('./data/cleaned_tweets.csv')

In [4]:
df.head()

Unnamed: 0,sentiment,text,cleaned_tweet,Porter_Stem,Snowball_Stem
0,0,"@switchfoot http://twitpic.com/2y1zl - Awww, t...",aww bummer shoulda got david carr third day,aww bummer shoulda got david carr third day,aww bummer shoulda got david carr third day
1,0,is upset that he can't update his Facebook by ...,upset cannot update facebook texting might cry...,upset can not updat facebook text might cri re...,upset can not updat facebook text might cri re...
2,0,@Kenichan I dived many times for the ball. Man...,dived many times ball managed save rest go bounds,dive mani time ball manag save rest go bound,dive mani time ball manag save rest go bound
3,0,my whole body feels itchy and like its on fire,whole body feels itchy like fire,whole bodi feel itchi like fire,whole bodi feel itchi like fire
4,0,"@nationwideclass no, it's not behaving at all....",behaving im mad cannot see,behav im mad can not see,behav im mad can not see


---

# 3. Drop text

In [5]:
df = df[['sentiment', 'Snowball_Stem']]

In [6]:
df.head()

Unnamed: 0,sentiment,Snowball_Stem
0,0,aww bummer shoulda got david carr third day
1,0,upset can not updat facebook text might cri re...
2,0,dive mani time ball manag save rest go bound
3,0,whole bodi feel itchi like fire
4,0,behav im mad can not see


---

# 4. Drop rows with NaN

In [7]:
df.isna().sum()

sentiment           0
Snowball_Stem    8046
dtype: int64

In [8]:
df = df.dropna()

In [9]:
df.isna().sum()

sentiment        0
Snowball_Stem    0
dtype: int64

---

# 5. Split dataset into training and test data

In [10]:
X = df['Snowball_Stem']

In [11]:
y = df['sentiment']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

- 80% training data
- 20% test data

In [13]:
X_train.shape

(1273563,)

In [14]:
y_train.shape

(1273563,)

In [15]:
X_test.shape

(318391,)

In [16]:
y_test.shape

(318391,)

---

# 6. Write to text file

In [17]:
train = open('./data/fasttext_data_tweets.train', 'w')
test = open('./data/fasttext_data_tweets.test', 'w')

In [18]:
for i in range(1600000):
    try:
        # print(f'__label__{y_test[i]} {X_test[i]}', file=test)
        print(f'__label__{y_train[i]} {X_train[i]}', file=train)
    except:
        pass

for i in range(1600000):
    try:
        print(f'__label__{y_test[i]} {X_test[i]}', file=test)
        # print(f'__label__{y_train[i]} {X_train[i]}', file=train)
    except:
        pass


In [19]:
train.close()
test.close()

---

# 7. Train Classifier

In [20]:
!./SavedModels/fastText-0.1.0/fasttext supervised -input ./data/fasttext_data_tweets.train -output ./SavedModels/fasttext_model_tweet -epoch 30

Read 11M words
Number of words:  187004
Number of labels: 2


Progress: 78.2%  words/sec/thread: 1848277  lr: 0.021819  loss: 0.445058  eta: 0h0m 0m 0.3%  words/sec/thread: 1821872  lr: 0.099672  loss: 0.361759  eta: 0h0m 0.099474  loss: 0.386868  eta: 0h0m 1820357  lr: 0.098907  loss: 0.511651  eta: 0h0m   loss: 0.538886  eta: 0h0m   words/sec/thread: 1836124  lr: 0.098534  loss: 0.536472  eta: 0h0m 0.527659  eta: 0h0m 0.097381  loss: 0.527292  eta: 0h0m m   eta: 0h0m   lr: 0.096433  loss: 0.514226  eta: 0h0m 4.4%  words/sec/thread: 1842611  lr: 0.095628  loss: 0.525342  eta: 0h0m 4.8%  words/sec/thread: 1842018  lr: 0.095203  loss: 0.526181  eta: 0h0m 5.4%  words/sec/thread: 1841465  lr: 0.094627  loss: 0.545132  eta: 0h0m 5.6%  words/sec/thread: 1841111  lr: 0.094426  loss: 0.551009  eta: 0h0m 0.551656  eta: 0h0m 0.540203  eta: 0h0m 0.092956  loss: 0.543214  eta: 0h0m   words/sec/thread: 1843581  lr: 0.092754  loss: 0.542587  eta: 0h0m   words/sec/thread: 1844688  lr: 0.092538  loss: 0.541594  eta: 0h0m 0.537758  eta: 0h0m 0.091941  loss: 0.53

ress: 78.2%  words/sec/thread: 1848147  lr: 0.021796  loss: 0.445060  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848154  lr: 0.021796  loss: 0.445060  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848162  lr: 0.021795  loss: 0.445065  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848171  lr: 0.021795  loss: 0.445064  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848178  lr: 0.021794  loss: 0.445063  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848187  lr: 0.021794  loss: 0.445064  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848174  lr: 0.021794  loss: 0.445068  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848183  lr: 0.021793  loss: 0.445069  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848191  lr: 0.021793  loss: 0.445071  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848198  lr: 0.021793  loss: 0.445068  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848207  lr: 0.021792  loss: 0.445068  eta: 0h0m Progress: 78.2%  words/sec/thread: 1848214  lr: 0.021792  loss: 0.445

Progress: 100.0%  words/sec/thread: 1857160  lr: 0.000000  loss: 0.430704  eta: 0h0m 9.0%  words/sec/thread: 1848805  lr: 0.020955  loss: 0.445093  eta: 0h0m   words/sec/thread: 1848681  lr: 0.020897  loss: 0.445105  eta: 0h0m ress: 79.5%  words/sec/thread: 1849187  lr: 0.020463  loss: 0.445476  eta: 0h0m ress: 79.9%  words/sec/thread: 1849248  lr: 0.020059  loss: 0.446205  eta: 0h0m ress: 80.4%  words/sec/thread: 1849550  lr: 0.019630  loss: 0.446593  eta: 0h0m 80.7%  words/sec/thread: 1849901  lr: 0.019265  loss: 0.446748  eta: 0h0m   words/sec/thread: 1849879  lr: 0.019208  loss: 0.446835  eta: 0h0m   loss: 0.446812  eta: 0h0m s: 0.446706  eta: 0h0m m ogress: 81.7%  words/sec/thread: 1850404  lr: 0.018287  loss: 0.445980  eta: 0h0m ress: 82.1%  words/sec/thread: 1850522  lr: 0.017889  loss: 0.445007  eta: 0h0m %  words/sec/thread: 1850953  lr: 0.017500  loss: 0.444151  eta: 0h0m rds/sec/thread: 1850919  lr: 0.017466  loss: 0.444060  eta: 0h0m   lr: 0.017099  loss: 0.443495  eta: 0h0

---

# 8. Test classifier

In [21]:
!./SavedModels/fastText-0.1.0/fasttext test ./SavedModels/fasttext_model_tweet.bin ./data/fasttext_data_tweets.test

N	318391
P@1	0.753
R@1	0.753
Number of examples: 318391


---

# 9. Predictions


In [22]:
classifier = load_model("./SavedModels/fasttext_model_tweet.bin")
texts = list(X_test)

labels = classifier.predict(texts)
# print (labels)



In [23]:
predictions = []
for i in range(len(labels[0])):
    predictions.append(int(labels[0][i][0][-1]))

In [24]:
accuracy_score(y_test, predictions)

0.7534321007817432