### Model Naiye Bayes Percobaan

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn import metrics

df=pd.read_csv('pre_content.csv')
df.head(10)

Unnamed: 0,content,sentimen,url_berita
0,presiden joko widodo memimpin upacara peringat...,0,https://20.detik.com/detikupdate/20231001-2310...
1,kpk mengungkap oknum menghilangkan barang bukt...,0,https://20.detik.com/detikupdate/20231001-2310...
2,remaja palestina umur 14 tahun ditembak mati p...,0,https://20.detik.com/detikupdate/20230831-2308...
3,presiden joko widodo memimpin upacara peringat...,0,https://20.detik.com/detikupdate/20231001-2310...
4,20detik merangkum kejadian heboh menarik sepek...,0,https://20.detik.com/detikupdate/20231001-2310...
5,sekjen pdip hasto kristiyanto berbicara terkai...,0,https://20.detik.com/detikupdate/20231001-2310...
6,sekjen pdip hasto kristiyanto membenarkan ganj...,0,https://20.detik.com/detikupdate/20231001-2310...
7,menko polhukam mahfud md buka suara temuan 12 ...,0,https://20.detik.com/detikupdate/20231001-2310...
8,saksi sidang korupsi bt 4g kominfo menyebut me...,0,https://20.detik.com/detikupdate/20231001-2310...
9,menko polhukam mahfud md buka suara oknum menc...,0,https://20.detik.com/detikupdate/20231001-2310...


In [19]:
def map_sentiment(sentiment):
    if sentiment == 1:
        return 'positif'
    elif sentiment == 0:
        return 'netral'
    else:
        return 'negatif'

In [20]:
df['sentimen'] = df['sentimen'].map(map_sentiment)

In [21]:
X = df['content']
y = df['sentimen']

In [22]:
def prepare_datasets(corpus, labels, test_data_proportion=0.3):
    train_X, test_X, train_Y, test_Y = train_test_split(corpus, labels,
    test_size=0.33,random_state=42)
    return train_X, test_X, train_Y, test_Y

norm_train_corpus, norm_test_corpus, train_labels, test_labels = prepare_datasets(X,y,test_data_proportion=0.3)

In [23]:
#Fungsi untuk mengekstraksi feature menggunakan TF-IDF Model
def tfidf_transformer(bow_matrix):
    transformer = TfidfTransformer(norm='l2',
                                    smooth_idf=True,
                                    use_idf=True)
    tfidf_matrix = transformer.fit_transform(bow_matrix)
    return transformer, tfidf_matrix
def tfidf_extractor(corpus, ngram_range=(1,2)):
    vectorizer = TfidfVectorizer(min_df=1,
                                norm='l2',
                                smooth_idf=True,
                                use_idf=True,
                                ngram_range=ngram_range)
    features = vectorizer.fit_transform(corpus)
    return vectorizer, features

In [24]:
#mengunakan model TF-IDF untuk mengekstraksi feature
tfidf_vectorizer, tfidf_train_features = tfidf_extractor(norm_train_corpus)
tfidf_test_features = tfidf_vectorizer.transform(norm_test_corpus)

In [25]:
# Inisialisasi dan latih model Naive Bayes
nb_classifier = MultinomialNB()
nb_classifier.fit(tfidf_train_features, train_labels)


In [26]:
predicted_labels = nb_classifier.predict(tfidf_test_features)

In [27]:
accuracy = accuracy_score(test_labels, predicted_labels)
print("Akurasi Model:", accuracy)

Akurasi Model: 0.5943434343434344


In [28]:
from sklearn.metrics import classification_report
print(classification_report(test_labels, predicted_labels))

              precision    recall  f1-score   support

     negatif       0.59      0.56      0.57      1646
      netral       0.61      0.55      0.58      1670
     positif       0.59      0.68      0.63      1634

    accuracy                           0.59      4950
   macro avg       0.60      0.59      0.59      4950
weighted avg       0.60      0.59      0.59      4950



### Tambahan jika GridSearch

In [29]:
from sklearn.model_selection import GridSearchCV

In [30]:
nb_classifier = MultinomialNB()
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 2.0, 5.0, 10.0]
    }
grid_search = GridSearchCV(estimator=nb_classifier, param_grid=param_grid, cv=5, scoring='accuracy')

In [31]:
grid_search.fit(tfidf_train_features, train_labels)

In [32]:
# Cetak hyperparameter terbaik yang ditemukan
print("Hyperparameter Terbaik:", grid_search.best_params_)

# Cetak akurasi model terbaik
print("Akurasi Model Terbaik:", grid_search.best_score_)

Hyperparameter Terbaik: {'alpha': 0.1}
Akurasi Model Terbaik: 0.5977114427860697


In [33]:
# Prediksi label pada data pengujian menggunakan model terbaik
best_nb_classifier = grid_search.best_estimator_
predicted_labels = best_nb_classifier.predict(tfidf_test_features)

# Evaluasi akurasi pada data pengujian
accuracy = accuracy_score(test_labels, predicted_labels)
print("Akurasi pada Data Pengujian:", accuracy)

Akurasi pada Data Pengujian: 0.6068686868686869


### Random Search CV

In [34]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform

In [35]:
# Inisialisasi model Naive Bayes
nb_classifier = MultinomialNB()

# Daftar hyperparameter yang ingin disetel beserta distribusi nilai acaknya
param_dist = {'alpha': uniform(0, 10)}

In [36]:
# Inisialisasi RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=nb_classifier, param_distributions=param_dist, n_iter=100, cv=5, scoring='accuracy', random_state=42)

# Lakukan pencarian random
random_search.fit(tfidf_train_features, train_labels)

In [37]:
# Cetak hyperparameter terbaik yang ditemukan
print("Hyperparameter Terbaik:", random_search.best_params_)

# Cetak akurasi model terbaik
print("Akurasi Model Terbaik:", random_search.best_score_)


Hyperparameter Terbaik: {'alpha': 0.05522117123602399}
Akurasi Model Terbaik: 0.5980099502487563


In [38]:
# Prediksi label pada data pengujian menggunakan model terbaik
best_nb_classifier = random_search.best_estimator_
predicted_labels = best_nb_classifier.predict(tfidf_test_features)

# Evaluasi akurasi pada data pengujian
accuracy = accuracy_score(test_labels, predicted_labels)
print("Akurasi pada Data Pengujian:", accuracy)

Akurasi pada Data Pengujian: 0.6082828282828283
