### Model Sentimen

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import numpy as np

In [2]:
def map_sentiment(sentiment):
    if sentiment == 1:
        return 'positif'
    elif sentiment == 0:
        return 'netral'
    else:
        return 'negatif'

In [3]:
def prepare_datasets(corpus, labels, test_data_proportion=0.3, random_state=42):
    train_X, test_X, train_Y, test_Y = train_test_split(corpus, labels,
                                                        test_size=test_data_proportion,
                                                        random_state=random_state)
    return train_X, test_X, train_Y, test_Y

In [4]:
def tfidf_extractor(corpus, ngram_range=(1,2)):
    vectorizer = TfidfVectorizer(min_df=1,
                                norm='l2',
                                smooth_idf=True,
                                use_idf=True,
                                ngram_range=ngram_range)
    features = vectorizer.fit_transform(corpus)
    return vectorizer, features

In [5]:
df = pd.read_csv('pre_title.csv')
df['sentimen'] = df['sentimen'].map(map_sentiment)
df.head(10)

Unnamed: 0,title,url_berita,sentimen
0,strategi sleman dorong ekonomi pakai batik int...,https://20.detik.com/blak-blakan/20231204-2312...,positif
1,greysia polii ajak anak muda terus melangkah r...,https://20.detik.com/demi-indonesia/20231027-2...,positif
2,pan x jakcloth pesta anak nongkrong sambangi b...,https://20.detik.com/advertorial/20231109-2311...,positif
3,arahan megawati rakernas iv pdip turun rakyatm...,https://20.detik.com/detikupdate/20231001-2310...,positif
4,erick thohir ajak muda tak mager indonesia leb...,https://20.detik.com/demi-indonesia/20231027-2...,positif
5,pdip sebut ganjar muliakan petani nelayan jadi...,https://20.detik.com/detikupdate/20231001-2310...,positif
6,bertemu habib rizieq cak imin bantah bahas duk...,https://20.detik.com/detikupdate/20231001-2310...,positif
7,kades bandung mundur dukung amin anies salut,https://20.detik.com/detikupdate/20231001-2310...,positif
8,gestur prabowo ditanya soal kans berduet ganjar,https://20.detik.com/detikupdate/20231001-2310...,positif
9,anies bertemu alumni itb bahas metode ilmiah m...,https://20.detik.com/detikupdate/20231001-2310...,positif


In [6]:
# Split data
x_train, x_test, y_train, y_test = prepare_datasets(df['title'], df['sentimen'], test_data_proportion=0.3)