In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, roc_auc_score, f1_score

import pickle

Modelling

In [2]:
# Load your dataset (assuming it's already preprocessed and contains 'stemmed_text' and 'label' columns)
df_gopay = pd.read_csv('gopay_app_after_full_cleaning.csv')

# Map labels to numerical values
df_gopay['label'] = df_gopay['label'].map({'positive': 1, 'negative': 0})

# Split the dataset using stratified splitting
X = df_gopay['stemmed_text'].to_numpy()
y = df_gopay['label'].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=42)

# TF-IDF vectorization
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Logistic Regression model
model = LogisticRegression(C=1.2, solver='liblinear', random_state=42)
model.fit(X_train_tfidf, y_train)

# Save the model using pickle
pickle.dump(model, open('logreg.pkl', 'wb'))
pickle.dump(vectorizer, open('tfidf_vectorizer.pkl', 'wb'))

# Predictions and Evaluation
y_pred = model.predict(X_test_tfidf)

In [3]:
df_gopay[df_gopay['label']==0]

Unnamed: 0,content,score,label,month,clean_text,token_text,stemmed_text
11,Apk maling baru download trs top up tiba2 sald...,1,0,November,apk maling baru download trs top up tiba saldo...,"[apk, maling, download, top, up, saldo, hilang...",apk maling download top saldo bangkrut
12,Saldo belum masuk udah laporan malah di persul...,1,0,November,saldo belum masuk udah laporan malah di persul...,"[saldo, masuk, laporan, persulit, bilang, leng...",saldo masuk lapor sulit bilang lengkap isi ses...
14,Kalau gak niat ngasih pinjaman kok di tawarin ...,1,0,November,kalau gak niat ngasih pinjaman kok di tawarin ...,"[niat, mengasih, pinjaman, tawarin, iklan, nya...",niat asih pinjam tawarin iklan isi tolak
21,"Transfer bank harus d tingkatkan lagi, karna m...",1,0,November,transfer bank harus d tingkatkan lagi karna ma...,"[transfer, bank, tingkatkan, masuk, nya, realt...",transfer bank tingkat masuk realtime
28,Tiap main slot pakai rek gopay .gak pernah men...,1,0,November,tiap main slot pakai rek gopay gak pernah mena...,"[main, slot, pakai, rek, gopay, menang, slotnya]",main slot pakai rek gopay menang
...,...,...,...,...,...,...,...
22847,Tidak bisa di buka aplikasinya parah bnget gopay,1,0,April,tidak bisa di buka aplikasinya parah bnget gopay,"[buka, aplikasi, nya, parah, banget, gopay]",buka aplikasi parah gopay
22848,Ribet asw mau buka akun aja susah baru kli ini...,1,0,April,ribet asw mau buka akun aja susah baru kli ini...,"[ribet, asw, buka, akun, susah, kali, gue, dow...",ribet buka akun susah download aplikasi langsu...
22850,Susah masuknya malah anti trus,1,0,April,susah masuknya malah anti trus,"[susah, masuk, nya, anti]",susah masuk anti
22851,Terima kasih,2,0,April,terima kasih,"[terima, kasih]",terima kasih
