In [None]:
!pip install streamlit
!pip install pyngrok
!pip install scikit-learn

Collecting streamlit
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.51.0
Collecting pyngrok
  Downloading pyngrok-7.4.1-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.4.1-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.4.1


In [None]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import pickle

print("1. Memuat data dan membersihkan teks...")
try:
    df = pd.read_csv('spam.csv', encoding='latin-1')

    df = df.iloc[:, [0, 1]]
    df.columns = ['label', 'message']

    def clean_text(text):
        text = str(text).lower()
        text = re.sub(r"http\S+|www\S+", " ", text)
        text = re.sub(r"[^a-z\s]", " ", text)
        text = re.sub(r"\s+", " ", text).strip()
        return text

    df['message'] = df['message'].apply(clean_text)
    df['label_num'] = df['label'].map({'ham':0, 'spam':1})

    X = df.message
    y = df.label_num

    print(f"   - Data berhasil dimuat. Total baris: {len(df)}")

except Exception as e:
    print(f"Error fatal: Gagal memuat atau memproses data. Detail: {e}")
    X, y = None, None
    exit()

if X is not None and y is not None:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    print(f"\n2. Data dibagi: Train={len(X_train)}, Test={len(X_test)}")

    print("\n3. Vektorisasi menggunakan CountVectorizer (Bag-of-Words)...")

    vectorizer = CountVectorizer(stop_words='english')
    X_train_vec = vectorizer.fit_transform(X_train.astype(str))
    X_test_vec = vectorizer.transform(X_test.astype(str))
    print("   - CountVectorizer berhasil dihitung.")

    model = MultinomialNB(alpha=0.01)
    model.fit(X_train_vec, y_train)
    print("   - Pelatihan model selesai.")

    y_pred = model.predict(X_test_vec)
    print("\n5. Laporan Klasifikasi pada Data Test (CountVectorizer & Alpha 0.01):")
    print(classification_report(y_test, y_pred, target_names=['Ham', 'Spam'], zero_division=0))

    print("\n6. Menyimpan model (CountVectorizer) baru untuk Streamlit...")
    with open('nb_spam_model.pkl', 'wb') as f:
        pickle.dump(model, f)

    with open('tfidf_vectorizer.pkl', 'wb') as f:
        pickle.dump(vectorizer, f)

1. Memuat data dan membersihkan teks...
   - Data berhasil dimuat. Total baris: 5572

2. Data dibagi: Train=4457, Test=1115

3. Vektorisasi menggunakan CountVectorizer (Bag-of-Words)...
   - CountVectorizer berhasil dihitung.
   - Pelatihan model selesai.

5. Laporan Klasifikasi pada Data Test (CountVectorizer & Alpha 0.01):
              precision    recall  f1-score   support

         Ham       0.99      0.99      0.99       966
        Spam       0.94      0.91      0.93       149

    accuracy                           0.98      1115
   macro avg       0.96      0.95      0.96      1115
weighted avg       0.98      0.98      0.98      1115


6. Menyimpan model (CountVectorizer) baru untuk Streamlit...


In [None]:
!pip install streamlit pyngrok -q

In [None]:
%%writefile app.py
import streamlit as st
import pickle
import pandas as pd
import numpy as np
import re

try:
    model = pickle.load(open('nb_spam_model.pkl', 'rb'))
    vectorizer = pickle.load(open('tfidf_vectorizer.pkl', 'rb'))
except FileNotFoundError:
    st.error("Error: Pastikan file 'nb_spam_model.pkl' dan 'tfidf_vectorizer.pkl' sudah ada.")
    st.stop()
except Exception as e:
    st.error(f"Error saat memuat model: {e}")
    st.stop()

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\S+", " ", text)
    text = re.sub(r"[^a-z\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def predict_message(text_message):
    cleaned_message = clean_text(text_message)
    X_transformed = vectorizer.transform([cleaned_message])
    prediction = model.predict(X_transformed)[0]
    return 'spam' if prediction == 1 else 'ham'


st.set_page_config(page_title="SMS Spam Classifier", layout="centered")

st.title("Aplikasi Klasifikasi SMS SPAM/HAM")
st.markdown("Deteksi spam menggunakan **Multinomial Naïve Bayes (CountVectorizer + Alpha 0.01)**.")
text = st.text_area("Masukkan teks SMS di sini:",
                    "Contoh: you are the winner, claim your new iphone")

if st.button("Deteksi Pesan"):
    if text.strip() == "":
        st.warning("Mohon masukkan pesan sebelum melakukan prediksi.")
    else:
        pred_label = predict_message(text)

        X_prob = vectorizer.transform([text])
        probabilities = model.predict_proba(X_prob)[0]


        if pred_label == 'spam':
            st.error(f"Hasil Klasifikasi: Pesan ini adalah **SPAM**.")
            st.write(f"Probabilitas Spam: **{probabilities[1]*100:.2f}%**")
        else:
            st.success(f"Hasil Klasifikasi: Pesan ini adalah **HAM (Bukan Spam)**.")
            st.write(f"Probabilitas Ham: **{probabilities[0]*100:.2f}%**")

st.markdown("---")


Writing app.py


In [None]:
!kill $(lsof -t -i:8501)


kill: usage: kill [-s sigspec | -n signum | -sigspec] pid | jobspec ... or kill -l [sigspec]


In [None]:
from pyngrok import ngrok

ngrok.set_auth_token("350kZPOKB9yqL3VUUczOMOsuJ3a_7YiNR2L8g6ocQB199EcT2")




In [None]:
!streamlit run app.py &>/content/logs.txt &

from pyngrok import ngrok
public_url = ngrok.connect(8501)
print("Your Streamlit app is live at:", public_url.public_url)


Your Streamlit app is live at: https://mediative-nonseriously-hellen.ngrok-free.dev
