<a href="https://colab.research.google.com/github/Rahulappu2004/Fake-Account-Detection-in-Twitter-X/blob/main/RandomForestClassifier_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [8]:
# Load the data
df = pd.read_csv('/content/bot_detection_data.csv')

In [9]:
# Handle missing values
df = df.fillna(0)

# Encode categorical variables
label_encoder = LabelEncoder()
df['Verified'] = label_encoder.fit_transform(df['Verified'])
df['Location'] = label_encoder.fit_transform(df['Location'])
df['Bot Label'] = label_encoder.fit_transform(df['Bot Label'])

# Drop unnecessary columns
df = df.drop(['User ID', 'Username', 'Created At'], axis=1)

# Convert Hashtags to a numerical feature
df['Hashtags'] = df['Hashtags'].apply(lambda x: len(x.split(',')) if isinstance(x, str) else 0)

# Define features (X) and target (y)
X = df.drop('Bot Label', axis=1)
y = df['Bot Label']

In [12]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Drop or encode the 'Tweet' column before scaling
X_train = X_train.drop('Tweet', axis=1)  # Drop if 'Tweet' is not needed for the model
X_test = X_test.drop('Tweet', axis=1)    # Drop if 'Tweet' is not needed for the model

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
rf_model = RandomForestClassifier(random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf_model.predict(X_test)

In [14]:
# Evaluate the model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)

print("Random Forest Classifier:")
print(f"Accuracy: {accuracy_rf:.4f}")
print(f"Precision: {precision_rf:.4f}")
print(f"Recall: {recall_rf:.4f}")
print(f"F1-Score: {f1_rf:.4f}")

Random Forest Classifier:
Accuracy: 0.5054
Precision: 0.5089
Recall: 0.4881
F1-Score: 0.4983


In [17]:
from sklearn.preprocessing import StandardScaler # Assuming scaler is already defined

# Function to get custom input from the user
def get_custom_input():
    custom_input = {}
    print("Enter values for the following features:")
    for feature in ['Retweet Count', 'Mention Count', 'Follower Count', 'Verified', 'Location', 'Hashtags']:
        while True:
            try:
                value = int(input(f"{feature}: "))
                custom_input[feature] = value
                break
            except ValueError:
                print("Invalid input. Please enter an integer.")
    return custom_input

# Get custom input from the user
custom_input = get_custom_input()

# Convert to DataFrame
custom_df = pd.DataFrame([custom_input])

# Scale the input
custom_df = scaler.transform(custom_df)

# Predict using the model
rf_pred = rf_model.predict(custom_df)
print(f"Random Forest Prediction: {'Fake' if rf_pred[0] == 1 else 'Real'}")

Enter values for the following features:
Retweet Count: 3
Mention Count: 1
Follower Count: 3
Verified: 0
Location: 1
Hashtags: 1
Random Forest Prediction: Real
