<a href="https://colab.research.google.com/github/fufi-tuti/dataMiningprj/blob/main/feature_selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import VarianceThreshold, SelectFromModel
from sklearn.linear_model import Lasso
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt

# تحميل البيانات
file_path = "/content/Processed_Student_Depression_Dataset.xlsx"
df = pd.read_excel(file_path)

In [4]:
# تحضير البيانات
X = df.drop(columns=['Depression', 'id', 'Profession'])  # إزالة الأعمدة غير المفيدة
y = df['Depression']

In [5]:
# التعامل مع القيم المفقودة بعد التشفير
imputer = SimpleImputer(strategy='mean')  # استبدال القيم المفقودة بالمتوسط
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

In [6]:
# 1. L1 Regularization (Lasso) for Feature Selection
model = Lasso(alpha=0.1)
model.fit(X, y)
selected_l1_features = X.columns[model.coef_ != 0].tolist()
print("\nL1 Regularization Selected Features:", selected_l1_features)


L1 Regularization Selected Features: ['Have you ever had suicidal thoughts ?', 'Financial Stress']


In [7]:
# 2. Variance Threshold (Filter Method)
selector = VarianceThreshold(threshold=0.2)
X_new = selector.fit_transform(X)
selected_var_features = X.columns[selector.get_support()].tolist()
print("\nVariance Threshold Selected Features:", selected_var_features)


Variance Threshold Selected Features: ['Gender', 'Sleep Duration', 'Have you ever had suicidal thoughts ?', 'Financial Stress', 'Family History of Mental Illness', 'Dietary Habits_Moderate', 'Dietary Habits_Unhealthy']


In [8]:
# عرض الميزات المحددة
print("\nSelected Features after L1 Regularization:", selected_l1_features)
print("\nSelected Features after Variance Threshold:", selected_var_features)


Selected Features after L1 Regularization: ['Have you ever had suicidal thoughts ?', 'Financial Stress']

Selected Features after Variance Threshold: ['Gender', 'Sleep Duration', 'Have you ever had suicidal thoughts ?', 'Financial Stress', 'Family History of Mental Illness', 'Dietary Habits_Moderate', 'Dietary Habits_Unhealthy']


In [9]:
# إنشاء نسخة من البيانات تحتوي على الميزات المختارة وعمود الهدف
final_df = pd.concat([X[selected_l1_features], y], axis=1)

# حفظ البيانات المعدلة في ملف CSV
final_df.to_csv('final_selected_data.csv', index=False)

print("\nتم حفظ البيانات المعدلة في ملف: final_selected_data.csv ✅")



تم حفظ البيانات المعدلة في ملف: final_selected_data.csv ✅
