<a href="https://colab.research.google.com/github/Rahulappu2004/Fake-Account-Detection-in-Twitter-X/blob/main/Naive_bayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
# Load the data
df = pd.read_csv('/content/bot_detection_data.csv')

In [3]:
# Handle missing values
df = df.fillna(0)

# Encode categorical variables
label_encoder = LabelEncoder()
df['Verified'] = label_encoder.fit_transform(df['Verified'])
df['Location'] = label_encoder.fit_transform(df['Location'])
df['Bot Label'] = label_encoder.fit_transform(df['Bot Label'])

# Drop unnecessary columns
df = df.drop(['User ID', 'Username', 'Created At'], axis=1)

# Convert Hashtags to a numerical feature
df['Hashtags'] = df['Hashtags'].apply(lambda x: len(x.split(',')) if isinstance(x, str) else 0)

# Define features (X) and target (y)
X = df.drop('Bot Label', axis=1)
y = df['Bot Label']

In [5]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Drop the 'Tweet' column before scaling
X_train = X_train.drop('Tweet', axis=1)
X_test = X_test.drop('Tweet', axis=1)


# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
# Step 3: Implement Naive Bayes
# Initialize the model
nb_model = GaussianNB()

# Train the model
nb_model.fit(X_train, y_train)

# Make predictions
y_pred_nb = nb_model.predict(X_test)

# Evaluate the model
accuracy_nb = accuracy_score(y_test, y_pred_nb)
precision_nb = precision_score(y_test, y_pred_nb)
recall_nb = recall_score(y_test, y_pred_nb)
f1_nb = f1_score(y_test, y_pred_nb)

print("Naive Bayes:")
print(f"Accuracy: {accuracy_nb:.4f}")
print(f"Precision: {precision_nb:.4f}")
print(f"Recall: {recall_nb:.4f}")
print(f"F1-Score: {f1_nb:.4f}")

Naive Bayes:
Accuracy: 0.5021
Precision: 0.5062
Recall: 0.4308
F1-Score: 0.4655


In [9]:
# Step 4: Test with Custom Input
# Example custom input
custom_input = {
    'Retweet Count': 1000,
    'Mention Count': 5,
    'Follower Count': 100,
    'Verified': 1,
    'Location': 3,
    'Hashtags': 2
}

# Convert to DataFrame
custom_df = pd.DataFrame([custom_input])

# Scale the input
custom_df = scaler.transform(custom_df)

# Predict using the model
nb_pred = nb_model.predict(custom_df)
print(f"Naive Bayes Prediction: {'Fake' if nb_pred[0] == 1 else 'Real'}")

Naive Bayes Prediction: Fake
