# Initialization

In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Load the dataset
data_path = '../Dataset/Human Stress Dataset.csv'
dataset = pd.read_csv(data_path)
df = pd.DataFrame(dataset)

# Handling Outliers

In [5]:
# Define a function to remove outliers based on the IQR method
def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    # Define the acceptable range
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    # Clip the outliers
    df[column] = df[column].clip(lower_bound, upper_bound)

# Apply this function to all relevant columns
for col in df.columns[:-1]:  # Exclude 'Stress Levels'
    remove_outliers(df, col)

# SAve the cleaned dataset
df.to_csv('../Dataset/Cleaned Human Stress Dataset.csv', index=False)

**What it does:**

It limits (or "clips") the values in df[column] so that any value below lower_bound is set to lower_bound, and any value above upper_bound is set to upper_bound.\
This keeps all values within the specified range, replacing extreme outliers with the closest boundary value instead of removing them.

# Standardize Features

In [None]:
# Using StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df.drop("Stress Levels", axis=1))

# Convert back to a DataFrame and add the target column
df_scaled = pd.DataFrame(scaled_features, columns=df.columns[:-1])
df_scaled["Stress Levels"] = df["Stress Levels"].values

print(df_scaled.head())

   Snoring Rate  Respiratory Rate  Body Temperature  Limb Movement  \
0      0.887273          0.691429          0.488571       0.840000   
1      0.848000          0.650286          0.468000       0.792000   
2      0.272727          0.285714          0.785714       0.400000   
3      0.741091          0.538286          0.412000       0.661333   
4      0.056727          0.089143          0.919429       0.166400   

   Blood Oxygen  Eye Movement  Sleep Hours  Heart Rate  Stress Levels  
0      0.522667      0.880000     0.204444    0.691429              1  
1      0.503467      0.864000     0.172444    0.650286              1  
2      0.866667      0.555556     0.777778    0.285714              0  
3      0.451200      0.820444     0.085333    0.538286              1  
4      0.949867      0.277333     0.916444    0.089143              0  


This standardizes features to have a mean of 0 and a standard deviation of 1.