# Load the libraries

In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split
import joblib
from sklearn.feature_extraction.text import CountVectorizer

# Read the dataset

In [2]:
data = pd.read_csv('sentiments_v3.csv')
data

Unnamed: 0,Student No.,Perception,Sentiment
0,1,I love traditional learning,1
1,2,I hate online class/learning,0
2,3,I can hardly focus during online learning,0
3,4,Traditional learning is more efficient,1
4,5,Online learning is good for mental health,1
...,...,...,...
210,211,Traditional learning is effective because it i...,1
211,212,Using too much of laptop is one the disadvanta...,0
212,213,There is an active particapation in traditiona...,1
213,214,Traditional learning is more organized,1


# Dataset Pre-processing

In [3]:
def preprocess_data(data):
    # Remove student no. as it's not relevant
    data = data.drop('Student No.', axis=1)
    
    # Convert text to lowercase
    data['Perception'] = data['Perception'].str.strip().str.lower()
    return data

In [4]:
data = preprocess_data(data)

In [5]:
data

Unnamed: 0,Perception,Sentiment
0,i love traditional learning,1
1,i hate online class/learning,0
2,i can hardly focus during online learning,0
3,traditional learning is more efficient,1
4,online learning is good for mental health,1
...,...,...
210,traditional learning is effective because it i...,1
211,using too much of laptop is one the disadvanta...,0
212,there is an active particapation in traditiona...,1
213,traditional learning is more organized,1


# Splitting the data

In [6]:
# Split into training and testing data
x = data['Perception']
y = data['Sentiment']
x, x_test, y, y_test = train_test_split(x,y, stratify=y, test_size=0.25, random_state=42)

In [7]:
# Vectorize text perceptions to numbers
vec = CountVectorizer(stop_words='english')
x = vec.fit_transform(x).toarray()
x_test = vec.transform(x_test).toarray()
x_test

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

# Loading the Naive Bayes

In [8]:
from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(x, y)

MultinomialNB()

# Evaluating the NB Model

In [9]:
model.score(x_test, y_test)

0.7777777777777778

# Testing

In [10]:
#[1] = positive
#[0] = negative

In [11]:
model.predict(vec.transform(['i am happy with traditional learning']))

array([1], dtype=int64)

In [12]:
model.predict(vec.transform(['the quality of online learning is poor']))

array([0], dtype=int64)

In [13]:
model.predict(vec.transform(['online class is fun']))

array([1], dtype=int64)

In [16]:
model.predict(vec.transform(['online class is tiring']))

array([0], dtype=int64)

In [17]:
model.predict(vec.transform(['i learned more in traditional learning']))

array([1], dtype=int64)