# NLP Project â€“ Indonesian News Title Classification

## 1. Exploratory Data Analysis (EDA)

In [None]:
import pandas as pd

df = pd.read_csv('indonesian-news-title.csv')
df.head()

In [None]:
df.info()

In [None]:
df[label_col].value_counts()

## 2. Pre-processing Data

In [None]:

import re

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    return text

df['clean_text'] = df[text_col].apply(clean_text)
df[['clean_text']].head()


## 3. Feature Engineering

In [None]:

from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(df['clean_text'])
y = df[label_col]


## 4. Training Model & Perbandingan

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

nb_model = MultinomialNB()
svm_model = LinearSVC()

nb_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)


## 5. Evaluasi & Analisis

In [None]:

from sklearn.metrics import classification_report, accuracy_score

nb_pred = nb_model.predict(X_test)
svm_pred = svm_model.predict(X_test)

print("Naive Bayes Accuracy:", accuracy_score(y_test, nb_pred))
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))

print("\nNaive Bayes Report:\n", classification_report(y_test, nb_pred))
print("\nSVM Report:\n", classification_report(y_test, svm_pred))
