In [5]:
# TRAINING CELL

import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense

# Load datasets
df_users = pd.read_csv("users.csv")
df_fakeusers = pd.read_csv("fakeusers.csv")

# Add isFake label
df_users["isFake"] = 0
df_fakeusers["isFake"] = 1

# Combine and shuffle
df_allUsers = pd.concat([df_users, df_fakeusers], ignore_index=True)
df_allUsers = df_allUsers.sample(frac=1).reset_index(drop=True)

# Prepare features and encode 'lang'
features = [
    "statuses_count", "followers_count", "friends_count",
    "favourites_count", "listed_count", "geo_enabled",
    "profile_use_background_image", "lang"
]
df_allUsers["lang"] = df_allUsers["lang"].astype("category")
df_allUsers["lang_code"] = df_allUsers["lang"].cat.codes

X = df_allUsers[[
    "statuses_count", "followers_count", "friends_count",
    "favourites_count", "listed_count", "geo_enabled",
    "profile_use_background_image", "lang_code"
]].fillna(0)

Y = df_allUsers["isFake"]

# Save language mapping
lang_mapping = dict(enumerate(df_allUsers["lang"].cat.categories))
pd.Series(lang_mapping).to_csv("lang_mapping.csv")

# Split data
train_X, test_X, train_y, test_y = train_test_split(X, Y, test_size=0.2, random_state=0)
train_X, val_X, train_y, val_y = train_test_split(train_X, train_y, test_size=0.2, random_state=0)

# Build model
model = Sequential([
    Dense(32, activation='relu', input_dim=train_X.shape[1]),
    Dense(64, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train and save model
model.fit(train_X, train_y, validation_data=(val_X, val_y), epochs=50, batch_size=32, verbose=2)
model.save("fake_real_profile_model.h5")
print("✅ Model trained and saved successfully!")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
137/137 - 4s - 27ms/step - accuracy: 0.9588 - loss: 4.3264 - val_accuracy: 0.9799 - val_loss: 0.3571
Epoch 2/50
137/137 - 2s - 15ms/step - accuracy: 0.9783 - loss: 0.9163 - val_accuracy: 0.9808 - val_loss: 0.1313
Epoch 3/50
137/137 - 1s - 6ms/step - accuracy: 0.9744 - loss: 0.7656 - val_accuracy: 0.9762 - val_loss: 2.1528
Epoch 4/50
137/137 - 1s - 5ms/step - accuracy: 0.9792 - loss: 1.3324 - val_accuracy: 0.9881 - val_loss: 0.1216
Epoch 5/50
137/137 - 1s - 4ms/step - accuracy: 0.9689 - loss: 1.3073 - val_accuracy: 0.9734 - val_loss: 1.2812
Epoch 6/50
137/137 - 1s - 4ms/step - accuracy: 0.9677 - loss: 2.4185 - val_accuracy: 0.9698 - val_loss: 2.4701
Epoch 7/50
137/137 - 1s - 4ms/step - accuracy: 0.9757 - loss: 1.0059 - val_accuracy: 0.9835 - val_loss: 1.1984
Epoch 8/50
137/137 - 1s - 4ms/step - accuracy: 0.9757 - loss: 1.2074 - val_accuracy: 0.9808 - val_loss: 0.8091
Epoch 9/50
137/137 - 1s - 4ms/step - accuracy: 0.9810 - loss: 0.7380 - val_accuracy: 0.9725 - val_loss: 0.1607



✅ Model trained and saved successfully!


In [8]:
# PREDICTION CELL

import pandas as pd
from keras.models import load_model

# Load model and language mapping
model = load_model("fake_real_profile_model.h5")
lang_mapping = pd.read_csv("lang_mapping.csv", index_col=0, header=None).squeeze("columns").to_dict()
lang_inverse_mapping = {v: k for k, v in lang_mapping.items()}

def predict_user_profile(user_dict):
    lang = user_dict["lang"]
    user_dict["lang_code"] = lang_inverse_mapping.get(lang, 0)
    del user_dict["lang"]

    feature_order = [
        "statuses_count", "followers_count", "friends_count",
        "favourites_count", "listed_count", "geo_enabled",
        "profile_use_background_image", "lang_code"
    ]

    user_df = pd.DataFrame([user_dict])[feature_order].fillna(0)
    prediction = model.predict(user_df)[0][0]
    result = "Fake Profile" if prediction >= 0.5 else "Real Profile"
    print(f"🧠 Prediction: {result} (Confidence: {prediction:.2f})")

# 🧪 Try with an example user:
new_user = {
    "statuses_count": 100,
    "followers_count": 50,
    "friends_count": 200,
    "favourites_count": 20,
    "listed_count": 0,
    "geo_enabled": 1,
    "profile_use_background_image": 1,
    "lang": "en"
}

predict_user_profile(new_user)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
🧠 Prediction: Real Profile (Confidence: 0.28)
