<a href="https://colab.research.google.com/github/divasepta24/skripsi_analisis_diva/blob/main/skripsi_analisis_diva.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =====================================================
# 1. IMPORT & SETUP
# =====================================================

# Pustaka Dasar untuk Manipulasi Data dan Matematika
import pandas as pd
import numpy as np
import re # Regular Expression

# Pustaka NLP
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# Mencoba Sastrawi untuk stemming bahasa Indonesia, jika tidak ada, dilewati.
try:
    from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
    sastrawi_available = True
except ImportError:
    print("Pustaka Sastrawi tidak ditemukan. Stemming Bahasa Indonesia akan dilewati.")
    sastrawi_available = False

# Pustaka Pembelajaran Mesin dan Evaluasi
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_recall_fscore_support
from sklearn.feature_extraction.text import CountVectorizer

# Pustaka Deep Learning (PyTorch dan HuggingFace Transformers)
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    TrainingArguments,
    Trainer,
    pipeline
)

# Install/Upgrade the missing 'evaluate' and 'transformers' libraries
!pip install evaluate transformers --upgrade

import evaluate # Pustaka evaluasi HuggingFace

# Pustaka Visualisasi
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import plotly.express as px
import plotly.graph_objects as go
from collections import Counter
import itertools
import os

# --- Pengaturan Awal ---

# Unduh resource NLTK yang dibutuhkan
try:
    nltk.download('stopwords', quiet=True)
    nltk.download('punkt', quiet=True)
    nltk.download('wordnet', quiet=True)
    nltk.download('vader_lexicon', quiet=True)
    nltk.download('punkt_tab', quiet=True) # Tambahkan unduhan punkt_tab
except Exception as e:
    print(f"Gagal mengunduh NLTK resources: {e}")

# Tentukan device untuk PyTorch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Menggunakan device: {device}")

# Buat direktori untuk menyimpan model
model_dir = "bert_sentiment_model"
os.makedirs(model_dir, exist_ok=True)
print(f"Direktori model dibuat di: {model_dir}")

# Global variabel untuk warna plot
PALETTE = sns.color_palette("viridis", 3)