In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 1. Load dataset CSV
df = pd.read_csv('social_media_engagement_dataset.csv')

print("===== Kolom Dataset =====")
print(df.columns)

# 2. Ambil hanya kolom numerik
numeric_df = df.select_dtypes(include=[np.number])

# 3. Buat total engagement
numeric_df['total_engagement'] = numeric_df.sum(axis=1)

# 4. Buat target klasifikasi
mean_engagement = numeric_df['total_engagement'].mean()
numeric_df['engagement_category'] = (
    numeric_df['total_engagement'] > mean_engagement
).astype(int)

# 5. Pisahkan fitur dan target
X = numeric_df.drop(['total_engagement', 'engagement_category'], axis=1)
y = numeric_df['engagement_category']

# 6. Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 7. Normalisasi
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 8. Model KNN
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)

# 9. Prediksi
y_pred = model.predict(X_test)

# 10. Evaluasi
print("\nAkurasi:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# 11. Prediksi data baru
new_data = X.iloc[0:1].values
new_data_scaled = scaler.transform(new_data)
prediction = model.predict(new_data_scaled)

if prediction[0] == 1:
    print("\nKonten diprediksi ENGAGEMENT TINGGI")
else:
    print("\nKonten diprediksi ENGAGEMENT RENDAH")


===== Kolom Dataset =====
Index(['platform', 'post_type', 'post_length', 'views', 'likes', 'comments',
       'shares', 'engagement_rate'],
      dtype='object')

Akurasi: 0.9825

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      1036
           1       0.98      0.98      0.98       964

    accuracy                           0.98      2000
   macro avg       0.98      0.98      0.98      2000
weighted avg       0.98      0.98      0.98      2000


Confusion Matrix:
[[1020   16]
 [  19  945]]

Konten diprediksi ENGAGEMENT RENDAH


