In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load the dataset
df_user_behavior = pd.read_csv(r'D:\ChildTiming\data\user_behavior_dataset.csv')  # replace with actual path

# Split into features (X) and target (y)
X = df_user_behavior.drop(columns=['User Behavior Class', 'User ID'])
y = df_user_behavior['User Behavior Class']

# Identify columns with non-numeric data
non_numeric_columns = X.select_dtypes(include=['object']).columns
print("Non-numeric columns:", non_numeric_columns)

# Perform One-Hot Encoding for categorical columns
X_encoded = pd.get_dummies(X, columns=non_numeric_columns, drop_first=True)

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_encoded)

# Split the data into training and testing sets after encoding and scaling
X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the model
svm_model = SVC(random_state=42)

# Set up the parameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'kernel': ['linear', 'rbf', 'poly'],  # Kernel type
    'gamma': ['scale', 'auto', 0.1, 1, 10],  # Kernel coefficient
    'degree': [3, 5, 7]  # Only relevant for 'poly' kernel
}

# Perform GridSearchCV to find the best parameters
grid_search = GridSearchCV(svm_model, param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train_scaled, y_train)

# Get the best parameters and model
print(f"Best Parameters: {grid_search.best_params_}")
best_svm_model = grid_search.best_estimator_

# Evaluate the model
y_pred = best_svm_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Classification report for detailed metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Save the best model and scaler
joblib.dump(best_svm_model, 'user_behavior_svm_model_with_hyperparameters.pkl')
joblib.dump(scaler, 'user_behavior_scaler.pkl')



Non-numeric columns: Index(['Device Model', 'Operating System', 'Gender'], dtype='object')
Fitting 5 folds for each of 180 candidates, totalling 900 fits
Best Parameters: {'C': 0.1, 'degree': 3, 'gamma': 'scale', 'kernel': 'linear'}
Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        27
           2       1.00      1.00      1.00        29
           3       1.00      1.00      1.00        34
           4       1.00      1.00      1.00        27
           5       1.00      1.00      1.00        23

    accuracy                           1.00       140
   macro avg       1.00      1.00      1.00       140
weighted avg       1.00      1.00      1.00       140



['user_behavior_scaler.pkl']

In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.svm import SVC
import joblib

# Load the pre-trained model and scaler
svm_model_user_behavior = joblib.load(r'D:\ChildTiming\notebook\user_behavior_svm_model_with_hyperparameters.pkl')  # Adjust path
scaler = joblib.load('user_behavior_scaler.pkl')  # Adjust path

# Define the columns the model expects
expected_columns = [
    'Device Model_Samsung Galaxy S21', 'Device Model_Xiaomi Mi 11', 'Device Model_iPhone 12',
    'Operating System_Android', 'Operating System_iOS', 'Operating System_Windows',
    'App Usage Time (min/day)', 'Screen On Time (hours/day)', 'Battery Drain (mAh/day)',
    'Number of Apps Installed', 'Data Usage (MB/day)', 'Age', 'Gender_Female', 'Gender_Male'
]

# Function to handle user input
def get_user_input():
    # Prompt user for input
    device_model = input("Device Model (e.g., Xiaomi Mi 11): ")
    operating_system = input("Operating System (e.g., Android): ")
    app_usage_time = float(input("App Usage Time (min/day): "))
    screen_on_time = float(input("Screen On Time (hours/day): "))
    battery_drain = float(input("Battery Drain (mAh/day): "))
    num_apps = int(input("Number of Apps Installed: "))
    data_usage = float(input("Data Usage (MB/day): "))
    age = int(input("Age: "))
    gender = input("Gender (e.g., Male/Female): ")

    # Create a DataFrame with the input values
    user_input_df = pd.DataFrame({
        'Device Model': [device_model],
        'Operating System': [operating_system],
        'App Usage Time (min/day)': [app_usage_time],
        'Screen On Time (hours/day)': [screen_on_time],
        'Battery Drain (mAh/day)': [battery_drain],
        'Number of Apps Installed': [num_apps],
        'Data Usage (MB/day)': [data_usage],
        'Age': [age],
        'Gender': [gender]
    })

    # One-hot encode categorical features (Device Model, Operating System, and Gender)
    encoder = OneHotEncoder(drop='first', sparse=False)
    categorical_columns = ['Device Model', 'Operating System', 'Gender']
    categorical_data = user_input_df[categorical_columns]
    categorical_encoded = encoder.fit_transform(categorical_data)

    # Convert encoded data back into a DataFrame and concatenate with the rest of the input
    categorical_encoded_df = pd.DataFrame(categorical_encoded, columns=encoder.get_feature_names_out(categorical_columns))
    user_input_df = user_input_df.drop(columns=categorical_columns)
    user_input_encoded = pd.concat([user_input_df, categorical_encoded_df], axis=1)

    # Ensure the columns match the training set
    user_input_encoded = user_input_encoded.reindex(columns=expected_columns, fill_value=0)

    # Scale the user input using the same scaler used during training
    user_input_scaled = scaler.transform(user_input_encoded)

    return user_input_scaled

# Test the model with user input
user_input_scaled = get_user_input()
prediction = svm_model_user_behavior.predict(user_input_scaled)

# Output the prediction and the reward/suggestion based on the predicted behavior
print("Predicted User Behavior:", prediction[0])

if prediction[0] == 5:
    print("Inform parents: High engagement detected. Please monitor screen time.")
elif prediction[0] == 4:
    print("Warning: High engagement. Please consider limiting screen time.")
elif prediction[0] == 3:
    print("Warning: Moderate engagement. Ensure balanced usage.")
else:
    print("Reward: Low engagement. Consider rewarding with more screen time.")




ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- Gender_Female
- Operating System_Android
- Operating System_Windows
Feature names seen at fit time, yet now missing:
- Device Model_OnePlus 9
