<a href="https://colab.research.google.com/github/risetdito/sentiment-analysis/blob/master/003_Sentiment_Analysis_Grab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*Muhammad Apriandito Arya Saputra*


---




# **Sentiment Analysis - Grab**

## **Import Library**

In [None]:
# Import Library
import pandas as pd 

In [None]:
# Import Modules
from sklearn.feature_extraction.text import CountVectorizer # to create Bag of words
from sklearn.model_selection import train_test_split  # for splitting data
from sklearn.naive_bayes import GaussianNB # to bulid classifier model
from sklearn.preprocessing import LabelEncoder # to convert classes to number 
import sklearn.metrics as metrics # to calculate accuracy

## **Import Data**

In [None]:
# Import Train Data
df_grab = pd.read_csv('https://raw.githubusercontent.com/risetdito/dataset-sa/master/grab/tweet_grab.csv', sep = ';')
df_grab.head()

In [None]:
# Count the Sentiment
df_grab.sentiment.value_counts()

## **Vectorization**

In [None]:
# Vectorization / Feature Extraction 
count_vector = CountVectorizer()  
grab_feature = count_vector.fit_transform(df_grab['text']).toarray()
grab_feature

In [None]:
# Crate as Dataframe
grab_feature_matrix = pd.DataFrame(grab_feature,columns=count_vector.get_feature_names())
grab_feature_matrix.head()

In [None]:
# Encode Target
encoder = LabelEncoder()
grab_label = encoder.fit_transform(df_grab['sentiment'])
grab_label

## **Split Data**

In [None]:
# Set Training and Testing Data (70:30)
feature_train, feature_test, target_train, target_test = train_test_split(grab_feature, grab_label, shuffle = True, test_size=0.3, random_state=1)

# Show the Training and Testing Data
print(feature_train.shape)
print(feature_test.shape)
print(target_train.shape)
print(target_test.shape)

## **Modelling**

In [None]:
# Train Naive Bayes Model
nb = GaussianNB().fit(feature_train, target_train)

# Predict to Test Data
target_predicted = nb.predict(feature_test) 
target_predicted 

## **Validation**

In [None]:
# Confsion Matrix
cm = metrics.confusion_matrix(target_test, target_predicted)
cm

In [None]:
# Check Model Accuracy
print('Test model accuracy: ', metrics.accuracy_score(target_test, target_predicted))
print('Test model precision: ', metrics.precision_score(target_test, target_predicted))
print('Test model recall: ', metrics.recall_score(target_test, target_predicted))
print('Test model F1 Score: ', metrics.f1_score(target_test, target_predicted))

## **Use the  Model to Predict New Data**

In [None]:
# Import New Dataset
df_predict = pd.read_csv("https://raw.githubusercontent.com/risetdito/dataset-sa/master/grab/pred_grab.csv")
df_predict

In [None]:
# Vectorization
pred_feature = count_vector.transform(df_predict['text']).toarray()
pred_feature

In [None]:
prediction_result = nb.predict(pred_feature) 
prediction_result

In [None]:
df_predicted = pd.DataFrame(prediction_result, columns=["Predicted Sentiment"])

In [None]:
df_result = pd.concat([df_predict, df_predicted], axis=1)
df_result