ML Porject
Vanshika 229311041 AIML-6C

Synthetic Dataset(Risk Prediction using Random Forest and Logistic regression)

In [None]:
import pandas as pd
import numpy as np


np.random.seed(42)


mood_score = np.random.randint(1, 11, 50)
attendance = np.clip(mood_score * 10 + np.random.randint(-30, 30, 50), 50, 100)
task_completion = np.clip(mood_score * 8 + np.random.randint(-20, 20, 50), 30, 100)


social_media_usage = np.random.randint(0, 6, 50)
interaction_level = np.clip(100 - social_media_usage * 10 + np.random.randint(-10, 10, 50), 30, 100)


sleep_duration = np.clip(7 - mood_score / 10 + np.random.randint(-2, 2, 50), 5, 8)


physical_activity = np.clip(mood_score * 10 + np.random.randint(-30, 30, 50), 20, 120)


risk_level = np.where(
    (mood_score <= 4) & (attendance < 60) & (task_completion < 40), 2,
    np.where(
        (mood_score <= 6) | (attendance < 70) | (task_completion < 50), 1,
        0
    )
)


df = pd.DataFrame({
    'Mood Score': mood_score,
    'Attendance': attendance,
    'Task Completion': task_completion,
    'Interaction Level': interaction_level,
    'Sleep Duration': sleep_duration,
    'Social Media Usage': social_media_usage,
    'Physical Activity': physical_activity,
    'Risk Level': risk_level
})


print(df.head())


   Mood Score  Attendance  Task Completion  Interaction Level  Sleep Duration  \
0           7          86               42                 58             7.3   
1           4          50               30                 69             6.6   
2           8          63               67                 86             5.2   
3           5          50               30                 66             7.5   
4           7          75               43                 79             5.0   

   Social Media Usage  Physical Activity  Risk Level  
0                   4                 47           1  
1                   4                 45           2  
2                   2                 87           1  
3                   4                 59           1  
4                   3                 59           1  


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

#Random Forest

Features and Target

In [None]:
X = df.drop('Risk Level', axis=1)
y = df['Risk Level']

Splitting Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

Scaling

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Training the Model

In [None]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

Making Predications

In [None]:
y_pred = rf_model.predict(X_test_scaled)

Evaluating the Model

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 86.67%


In [None]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.89      0.89      0.89         9
           2       0.67      0.67      0.67         3

    accuracy                           0.87        15
   macro avg       0.85      0.85      0.85        15
weighted avg       0.87      0.87      0.87        15



In [None]:
print("\nConfusion Matrix")
print(confusion_matrix(y_test, y_pred))


Confusion Matrix
[[3 0 0]
 [0 8 1]
 [0 1 2]]


Using model with new data

In [None]:
new_data = np.array([[4, 7, 6, 1, 2, 9, 8]])

In [None]:
new_data_scaled = scaler.transform(new_data)



In [None]:
predicted_risk = rf_model.predict(new_data_scaled)

In [None]:
print("Predicted Risk Level: ", predicted_risk[0])

Predicted Risk Level:  1


In [None]:
risk_levels = ['Low', 'Medium', 'High']
print("Predicted Risk Level:", risk_levels[predicted_risk[0]])

Predicted Risk Level: Medium


#Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

Features and Data

In [None]:
X = df.drop('Risk Level', axis=1)
y = df['Risk Level']

Train-Test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

Feature Scaling

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Training Model

In [None]:
log_reg_model = LogisticRegression(max_iter=200)
log_reg_model.fit(X_train_scaled, y_train)

Predictions

In [None]:
y_pred = log_reg_model.predict(X_test_scaled)

Evaluation

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 93.33%


In [None]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      0.89      0.94         9
           2       0.75      1.00      0.86         3

    accuracy                           0.93        15
   macro avg       0.92      0.96      0.93        15
weighted avg       0.95      0.93      0.94        15



In [None]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Confusion Matrix:
[[3 0 0]
 [0 8 1]
 [0 0 3]]


New data

In [None]:
new_data = np.array([[4, 7, 6, 1, 2, 9, 8]])

In [None]:
new_data_scaled = scaler.transform(new_data)



In [None]:
predicted_risk = log_reg_model.predict(new_data_scaled)

In [None]:
print("Predicted Risk Level: ", predicted_risk[0])

Predicted Risk Level:  1


In [None]:
risk_levels = ['Low', 'Medium', 'High']
print("Predicted Risk Level:", risk_levels[predicted_risk[0]])

Predicted Risk Level: Medium
