# Vader Count Vectorizer

In [None]:
import pandas as pd
import numpy as np

df1 = pd.read_csv("sentiment results")

df1

## Splitting the Data

In [None]:
# Define Y (This is the value we will predict)
X = df1["Text Filtering"]
y = df1["Vader Sentiment"]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print("Train Data:", len(X_train))
print("Test Data:", len(X_test))

## Implementation Count Vectorizer

In [None]:
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer

count_vectorizer1 = CountVectorizer (max_features=2500, min_df=7, max_df=0.8, stop_words=stopwords.words('english'))
response1 = count_vectorizer1.fit_transform(X_train)

In [None]:
df_countvectorizer1 = pd.DataFrame(response1.toarray(), columns=count_vectorizer1.get_feature_names())
df_countvectorizer1.head()

## Tranforming the Dataset

In [None]:
X_train = count_vectorizer1.transform(X_train)
X_test  = count_vectorizer1.transform(X_test)
print(f'Vader Count Data Transformed.')

## Evaluate Model Function

In [None]:
from sklearn.ensemble import RandomForestClassifier

text_classifier1 = RandomForestClassifier(n_estimators=200, criterion='entropy', random_state=0)
text_classifier1.fit(X_train, y_train)

In [None]:
count_predictions1 = text_classifier1.predict(X_test)

## Random Forest Model  

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

cm1 = confusion_matrix(y_test,count_predictions1)
cr1 = classification_report(y_test,count_predictions1)
rf_cv1 = accuracy_score(y_test,count_predictions1)
print('Confusion Matrix Vader: \n',cm1)
print('Classification report: \n',cr1)
print(f'Vader Sentimen Random Forest Classifier on Count Vectors: {rf_cv1}')

# plot confusion matrix 
plt.figure(figsize=(6,4))
sentiment_classes = ['Negative', 'Neutral', 'Positive']
sns.heatmap(cm1, cmap=plt.cm.Reds, annot=True, fmt='d', 
            xticklabels=sentiment_classes,
            yticklabels=sentiment_classes)
plt.title('Confusion Matrix', fontsize=16)
plt.ylabel('Aktual Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.show()

# Vader TF-IDF

In [None]:
import pandas as pd
import numpy as np

df2 = pd.read_csv("sentiment results")

df2

## Splitting the Data

In [None]:
# Define Y (This is the value we will predict)
X = df2["Text Filtering"]
y = df2["Vader Sentiment"]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print("Train Data:", len(X_train))
print("Test Data:", len(X_test))

## Implementation TF-IDF

In [None]:
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer2 = TfidfVectorizer (max_features=2500, min_df=7, max_df=0.8, stop_words=stopwords.words('english'))
response2 = tfidf_vectorizer2.fit_transform(X_train)

In [None]:
df_tfidf_vectorizer2= pd.DataFrame(response2.toarray(), columns=tfidf_vectorizer2.get_feature_names())
df_tfidf_vectorizer2.head()

## Tranforming the Dataset

In [None]:
X_train = tfidf_vectorizer2.transform(X_train)
X_test  = tfidf_vectorizer2.transform(X_test)
print(f'Vader TF IDF Data Transformed.')

## Evaluate Model Function 

In [None]:
from sklearn.ensemble import RandomForestClassifier

text_classifier2 = RandomForestClassifier(n_estimators=200, criterion='entropy', random_state=0)
text_classifier2.fit(X_train, y_train)

In [None]:
tfidf_predictions2 = text_classifier2.predict(X_test)

## Random Forest Model 

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

cm2 = confusion_matrix(y_test,tfidf_predictions2)
cr2 = classification_report(y_test,tfidf_predictions2)
rf_tfidf2 = accuracy_score(y_test,tfidf_predictions2)
print('Confusion matrix: \n',cm2)
print('Classification report: \n',cr2)
print(f'Vader Sentimen Random Forest Classifier on TF-IDF Vectors: {rf_tfidf2}')

# plot confusion matrix 
plt.figure(figsize=(6,4))
sentiment_classes = ['Negative', 'Neutral', 'Positive']
sns.heatmap(cm2, cmap=plt.cm.Reds, annot=True, fmt='d', 
            xticklabels=sentiment_classes,
            yticklabels=sentiment_classes)
plt.title('Confusion Matrix', fontsize=16)
plt.ylabel('Aktual Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.show()

## Comparison of CV and TF-IDF Results 

In [None]:
import pandas as pd
model = {'Model':['Vader Random Forest Classifier - Count Vectors', 
                  'Vader Random Forest Classifier - TFIDF Vectors',
                 ],
         'Accuracy Score':[rf_cv1, rf_tfidf2]
         }
model_df = pd.DataFrame(model)
model_df