In [5]:

# -----------------------------------------
# 📦 Step 1: Import Libraries
# -----------------------------------------
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# -----------------------------------------
# 📂 Step 2: Load Dataset (uploaded in Colab)
# -----------------------------------------
from google.colab import files
uploaded = files.upload()

# Replace this with the actual filename
df = pd.read_csv("Students Social Media Addiction.csv")

# -----------------------------------------
# 👀 Step 3: EDA - View basic info
# -----------------------------------------
print("Shape:", df.shape)
print(df.head())
print(df.info())
print(df.isnull().sum())

# -----------------------------------------
# 🧼 Step 4: Clean + Create target column
# -----------------------------------------
# Create Addicted_Level from Addicted_Score
def categorize(score):
    if score <= 3:
        return "Low"
    elif score <= 6:
        return "Medium"
    else:
        return "High"

df["Addicted_Level"] = df["Addicted_Score"].apply(categorize)

# Drop ID column (not useful for prediction)
df.drop("Student_ID", axis=1, inplace=True)

# -----------------------------------------
# 🔠 Step 5: Encode Categorical Columns
# -----------------------------------------
le = LabelEncoder()
for col in df.columns:
    if df[col].dtype == "object":
        df[col] = le.fit_transform(df[col].astype(str))

# -----------------------------------------
# 🎯 Step 6: Define X (features) and y (target)
# -----------------------------------------
X = df.drop(["Addicted_Score", "Addicted_Level"], axis=1)
y = df["Addicted_Level"]

# -----------------------------------------
# 🧪 Step 7: Train/Test Split
# -----------------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -----------------------------------------
# 🤖 Step 8: Train Random Forest Model
# -----------------------------------------
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# -----------------------------------------
# 📊 Step 9: Evaluate Model
# -----------------------------------------
y_pred = model.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📋 Classification Report:\n", classification_report(y_test, y_pred))
print("\n📉 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Saving Students Social Media Addiction.csv to Students Social Media Addiction.csv
Shape: (705, 13)
   Student_ID  Age  Gender Academic_Level     Country  Avg_Daily_Usage_Hours  \
0           1   19  Female  Undergraduate  Bangladesh                    5.2   
1           2   22    Male       Graduate       India                    2.1   
2           3   20  Female  Undergraduate         USA                    6.0   
3           4   18    Male    High School          UK                    3.0   
4           5   21    Male       Graduate      Canada                    4.5   

  Most_Used_Platform Affects_Academic_Performance  Sleep_Hours_Per_Night  \
0          Instagram                          Yes                    6.5   
1            Twitter                           No                    7.5   
2             TikTok                          Yes                    5.0   
3            YouTube                           No                    7.0   
4           Facebook                    