<a href="https://colab.research.google.com/github/devyanighildiyal/buildspace/blob/main/User_Behaviour_Analysis_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

n_sessions = 10000
fraud_ratio = 0.05

np.random.seed(42)

session_ids = np.arange(1, n_sessions + 1)
user_ids = np.random.randint(1, 1000, n_sessions)
click_intervals = np.random.exponential(scale=2, size=n_sessions)
mouse_speeds = np.random.normal(loc=1, scale=0.5, size=n_sessions)
pages_visited = np.random.poisson(lam=5, size=n_sessions)
form_interactions = np.random.poisson(lam=3, size=n_sessions)
session_durations = np.random.exponential(scale=300, size=n_sessions)
is_fraud = np.random.binomial(1, fraud_ratio, n_sessions)

data = pd.DataFrame({
    'session_id': session_ids,
    'user_id': user_ids,
    'click_interval': click_intervals,
    'mouse_speed': mouse_speeds,
    'pages_visited': pages_visited,
    'form_interactions': form_interactions,
    'session_duration': session_durations,
    'is_fraud': is_fraud
})

data.to_csv('synthetic_user_behavior.csv', index=False)

print("Synthetic dataset created and saved to 'synthetic_user_behavior.csv'")


Synthetic dataset created and saved to 'synthetic_user_behavior.csv'


In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import joblib

In [3]:
data = pd.read_csv('synthetic_user_behavior.csv')

In [4]:
print(data.head())

   session_id  user_id  click_interval  mouse_speed  pages_visited  \
0           1      103        0.372079     0.828577              5   
1           2      436        0.201057     0.394206             11   
2           3      861        1.086478     0.926267              6   
3           4      271        0.240496     0.514372              4   
4           5      107        0.377273     0.394617              4   

   form_interactions  session_duration  is_fraud  
0                  7        502.664894         0  
1                  2        633.662077         0  
2                  1        927.819519         0  
3                  1        290.725112         0  
4                  3         18.933742         0  


In [5]:
features = ['click_interval', 'mouse_speed', 'pages_visited', 'form_interactions', 'session_duration']
X = data[features]
y = data['is_fraud']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
model = IsolationForest(contamination=0.05, random_state=42)
model.fit(X_train)



In [8]:
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

In [9]:
y_train_pred = np.where(y_train_pred == -1, 1, 0)
y_test_pred = np.where(y_test_pred == -1, 1, 0)

In [10]:
print("Training set evaluation:")
print(classification_report(y_train, y_train_pred))
print(confusion_matrix(y_train, y_train_pred))

Training set evaluation:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      7643
           1       0.04      0.04      0.04       357

    accuracy                           0.91      8000
   macro avg       0.50      0.50      0.50      8000
weighted avg       0.91      0.91      0.91      8000

[[7258  385]
 [ 342   15]]


In [11]:
print("Test set evaluation:")
print(classification_report(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))

Test set evaluation:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      1908
           1       0.07      0.07      0.07        92

    accuracy                           0.91      2000
   macro avg       0.51      0.51      0.51      2000
weighted avg       0.91      0.91      0.91      2000

[[1822   86]
 [  86    6]]


In [12]:
joblib.dump(model, 'user_behavior_model.pkl')
print("Model saved to 'user_behavior_model.pkl'")

Model saved to 'user_behavior_model.pkl'
