In [None]:
import pandas as pd
import subprocess
import os

tokenTwitter = '44dbcc46f990445db65fab0d47adf939ef3d8695'

config = {
    'namaFile': 'pendakian.csv',
    'kataKunci': 'pendakian since:2018-01-01 until:2025-12-01 lang:id',
    'limitTweets': 500,
    'tab': 'LATEST'
}

def installPkg():
    subprocess.run(['apt-get', 'update'], check=False)

    daftarPkg = [
        'ca-certificates', 'curl', 'gnupg',
        'libatk1.0-0', 'libatk-bridge2.0-0', 'libcups2',
        'libxcomposite1', 'libxdamage1', 'libxfixes3',
        'libxrandr2', 'libgbm1', 'libpango-1.0-0',
        'libcairo2', 'libasound2'
    ]

    for pkg in daftarPkg:
        subprocess.run(['apt-get', 'install', '-y', pkg], check=False)

def installNode():
    try:
        perintah = [
            ['mkdir', '-p', '/etc/apt/keyrings'],
            ['bash', '-c', 'curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg'],
            ['bash', '-c', 'echo "deb [signed-by=//etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list'],
            ['apt-get', 'update'],
            ['apt-get', 'install', 'nodejs', '-y']
        ]

        for cmd in perintah:
            subprocess.run(cmd, check=False)

        hasil = subprocess.run(['node', '-v'], capture_output=True, text=True)
        print(f"Node.js Terinstall: {hasil.stdout.strip()}")

    except Exception as e:
        print(f"Error Instal Node.js: {e}")

def installPlaywright():
    try:
        subprocess.run(['npm', 'uninstall', '-g', 'playwright'], check=False)
        subprocess.run(['npm', 'install', '-g', 'playwright'], check=False)
        subprocess.run(['playwright', 'install', 'chromium'], check=False)

    except Exception as e:
        print(f"Error Instal Playwright: {e}")

def ambilTweet():
    print(f"Mulai Ambil Tweet...")
    print(f"Kata Kunci: {config['kataKunci']}")
    print(f"Jumlah: {config['limitTweets']}")
    print(f"File Output: {config['namaFile']}")

    perintah = [
        'npx', '-y', 'tweet-harvest@2.6.1',
        '-o', config['namaFile'],
        '-s', config['kataKunci'],
        '--tab', config['tab'],
        '-l', str(config['limitTweets']),
        '--token', tokenTwitter
    ]

    try:
        hasil = subprocess.run(perintah, capture_output=True, text=True)

        if hasil.returncode == 0:
            print("Ambil Tweet Selesai.")
        else:
            print(f"Gagal Ambil Tweet: {hasil.stderr}")

    except Exception as e:
        print(f"Error Tweet-Harvest: {e}")

def main():
    print("PROSES AMBIL TWEET")
    print("="*50)

    installPkg()
    installNode()
    installPlaywright()
    ambilTweet()

    print("\nSELESAI!")

if __name__ == "__main__":
    main()

In [None]:
import pandas as pd

df = pd.read_csv('/content/temp_group_1.csv')

df['text'] = df['full_text']
df['label'] = 1

new_df = df[['text', 'label']]

new_df.to_csv('dataset.csv', index=False)

In [None]:
import pandas as pd
import re

def clean_text(text):
    if pd.isna(text):
        return ""

    text = str(text)
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'#\w+', '', text)
    text = re.sub(r'RT\s+', '', text)
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    text = text.strip()

    return text

df = pd.read_csv('new_file.csv')

df['text'] = df['text'].apply(clean_text)

df.to_csv('cleaned_file.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

classifier = None
vectorizer = None

def load_data(filename):
    global classifier, vectorizer

    df = pd.read_csv(filename)

    required_cols = ['text', 'label']
    if not all(col in df.columns for col in required_cols):
        print("Error: CSV harus punya kolom 'text' dan 'label'")
        return None

    X = df['text']
    y = df['label']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    vectorizer = TfidfVectorizer(max_features=3000, stop_words='english')
    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)

    classifier = LogisticRegression(max_iter=1000, random_state=42)
    classifier.fit(X_train_vec, y_train)

    y_pred = classifier.predict(X_test_vec)
    accuracy = accuracy_score(y_test, y_pred)

    print(f"Akurasi model: {accuracy:.4f}")
    print(classification_report(y_test, y_pred))

    model_data = {
        'model': classifier,
        'vectorizer': vectorizer
    }
    joblib.dump(model_data, 'tweet_classifier_model.pkl')
    print("Model disimpan sebagai 'tweet_classifier_model.pkl'")

    return classifier

def main():
    filename = input("Masukkan nama file CSV dataset: ").strip()

    try:
        model = load_data(filename)
        if model is not None:
            print("Model berhasil dibuat dan disimpan.")

    except FileNotFoundError:
        print(f"File '{filename}' tidak ditemukan")
    except Exception as e:
        print(f"Terjadi error: {str(e)}")

if __name__ == "__main__":
    main()

In [None]:
import pandas as pd
import numpy as np
import joblib
import warnings
warnings.filterwarnings('ignore')

def load_model():
    try:
        model_data = joblib.load('tweet_classifier_model.pkl')
        classifier = model_data['model']
        vectorizer = model_data['vectorizer']
        print("Model berhasil dimuat")
        return classifier, vectorizer
    except:
        print("Model 'tweet_classifier_model.pkl' tidak ditemukan")
        print("   Silakan train model terlebih dahulu")
        return None, None

def predict_single_tweet(classifier, vectorizer, tweet):
    tweet_vec = vectorizer.transform([tweet])
    prediction = classifier.predict(tweet_vec)[0]
    probability = classifier.predict_proba(tweet_vec)[0]

    if prediction == 1:
        label = "GUNUNG MELETUS"
    else:
        label = "BUKAN gunung meletus"

    prob_volcano = probability[1]
    prob_not = probability[0]

    print("\nHASIL PREDIKSI:")
    print("   Tweet:", tweet)
    print("   Kategori:", label)
    print("   Probabilitas gunung meletus:", f"{prob_volcano:.2%}")
    print("   Probabilitas bukan:", f"{prob_not:.2%}")

    return prediction, prob_volcano

def predict_from_csv(classifier, vectorizer, csv_file):
    try:
        df = pd.read_csv(csv_file)

        if 'text' not in df.columns:
            print("File CSV harus memiliki kolom 'text'")
            return

        tweets = df['text'].tolist()
        tweets_vec = vectorizer.transform(tweets)

        predictions = classifier.predict(tweets_vec)
        probabilities = classifier.predict_proba(tweets_vec)

        results = pd.DataFrame({
            'text': tweets,
            'prediction': predictions,
            'category': ['GUNUNG MELETUS' if p == 1 else 'BUKAN' for p in predictions],
            'prob_gunung_meletus': probabilities[:, 1],
            'prob_bukan': probabilities[:, 0]
        })

        print("\nHASIL PREDIKSI DARI FILE:", csv_file)
        print("=" * 80)

        for idx, row in results.iterrows():
            print(f"\nTweet {idx+1}: {row['text'][:100]}...")
            print("  Kategori:", row['category'])
            print("  Probabilitas gunung meletus:", f"{row['prob_gunung_meletus']:.2%}")

        print("\n" + "=" * 80)
        print("STATISTIK:")
        volcano_count = (results['prediction'] == 1).sum()
        total_count = len(results)
        print("   Jumlah tweet:", total_count)
        print("   Gunung meletus:", volcano_count, "tweet")
        print("   Bukan:", total_count - volcano_count, "tweet")

        results.to_csv('hasil_prediksi.csv', index=False)
        print("\nHasil prediksi disimpan sebagai 'hasil_prediksi.csv'")

        return results

    except FileNotFoundError:
        print("File", csv_file, "tidak ditemukan")
    except Exception as e:
        print("Error:", str(e))

def main():
    print("=" * 60)
    print("SISTEM PREDIKSI TWEET GUNUNG MELETUS")
    print("=" * 60)

    classifier, vectorizer = load_model()

    if classifier is None or vectorizer is None:
        return

    while True:
        print("\n" + "-" * 40)
        print("MENU PREDIKSI:")
        print("1. Input tweet manual")
        print("2. Upload file CSV")
        print("3. Keluar")
        print("-" * 40)

        choice = input("Pilih menu (1/2/3): ").strip()

        if choice == '1':
            print("\nINPUT TWEET MANUAL")
            print("=" * 40)
            tweet = input("Masukkan teks tweet: ").strip()
            if tweet:
                predict_single_tweet(classifier, vectorizer, tweet)
            else:
                print("Tweet tidak boleh kosong")

        elif choice == '2':
            print("\nUPLOAD FILE CSV")
            print("=" * 40)
            print("Format file CSV harus memiliki kolom 'text'")
            print("Contoh struktur file:")
            print("text")
            print("\"gunung merapi erupsi malam ini\"")
            print("\"hari ini cuaca cerah\"")
            print("=" * 40)

            csv_file = input("Masukkan nama file CSV: ").strip()
            if csv_file:
                predict_from_csv(classifier, vectorizer, csv_file)
            else:
                print("Nama file tidak boleh kosong")

        elif choice == '3':
            print("\nProgram selesai")
            break

        else:
            print("Pilihan tidak valid. Silakan pilih 1, 2, atau 3.")

if __name__ == "__main__":
    main()

SISTEM PREDIKSI TWEET GUNUNG MELETUS
Model berhasil dimuat

----------------------------------------
MENU PREDIKSI:
1. Input tweet manual
2. Upload file CSV
3. Keluar
----------------------------------------
Pilih menu (1/2/3): 1

INPUT TWEET MANUAL
Masukkan teks tweet: Brandon Stanton jadi sosok di balik Humans of New York, sebuah proyek fotografi yang membagikan kisah-kisah menarik dari ribuan orang yang ia temui.   Kepada Mata Najwa, Brandon bercerita soal asal mula Humans of New York, impak-impak yang terbangun karenanya, hingga karya-karyanya yang lain seperti film dokumenter, buku, hingga pameran.   Sudah tayang! Sosok di Balik Humans of New York: Pemburu Ribuan Wajah tayang di YouTube Najwa Shihab dan http://narasi.tv  | Mata Najwa  #MataNajwa #HumansofNewYork #Narasi #JadiPaham

HASIL PREDIKSI:
   Tweet: Brandon Stanton jadi sosok di balik Humans of New York, sebuah proyek fotografi yang membagikan kisah-kisah menarik dari ribuan orang yang ia temui.   Kepada Mata Najwa, Brando