In [19]:
import numpy as np
import pandas as pd
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from imblearn.over_sampling import RandomOverSampler

# Load the dataset
data = pd.read_csv("DATASET_BERITA_ASLI_PALSU - Sheet1.csv")

# Create binary labels
data['PALSU'] = (data['LABEL'] == "ASLI").astype(int)

# Drop unnecessary columns
data = data.drop(["LABEL"], axis=1)

# Split the data into training and testing sets
X, y = data['TEKS'], data['PALSU']

# Vectorize the text data
vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)
X_vectorized = vectorizer.fit_transform(X)

# Address class imbalance using oversampling
oversampler = RandomOverSampler(random_state=42)
X_balanced, y_balanced = oversampler.fit_resample(X_vectorized, y)

# Train a LinearSVC classifier
clf = LinearSVC(C=1.0)  # You can adjust the C parameter
clf.fit(X_balanced, y_balanced)

# Take user input text
user_input_text = input("Enter a text to predict: ")

# Vectorize the user input text
user_input_vectorized = vectorizer.transform([user_input_text])

# Make predictions
prediction = clf.predict(user_input_vectorized)

# Display the prediction
if prediction[0] == 0:
    print("Prediction: ASLI")
else:
    print("Prediction: PALSU")

# Save the model and vectorizer
joblib.dump(clf, 'your_model_filename.pkl')
joblib.dump(vectorizer, 'your_vectorizer_filename.pkl')




Prediction: ASLI


['your_vectorizer_filename.pkl']