In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from google.colab import files

# **Étape 1: Charger le dataset**
uploaded = files.upload()
df = pd.read_csv("human_vital_signs_dataset_2024.csv")

# **Étape 2: Exploration initiale (facultatif mais recommandé)**
print("Premières lignes du DataFrame:")
print(df.head())
print("\nInformations sur le DataFrame:")
df.info()
print("\nStatistiques descriptives du DataFrame:")
print(df.describe())

# **Étape 3: Mélanger le dataset**
df_shuffled = df.sample(frac=1).reset_index(drop=True)
print("\nPremières lignes du DataFrame mélangé:")
print(df_shuffled.head())


In [None]:
df.drop(['Patient ID', 'Timestamp'], axis=1, inplace=True)
df.info()
df.describe()
df.head()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df.to_csv('/content/drive/MyDrive/human_vital_signs_dataset_2024.csv', index=False)

In [None]:
df_unshuffled = pd.read_csv('/content/drive/MyDrive/human_vital_signs_dataset_2024.csv')
df = df_unshuffled.sample(frac=1, random_state=42).reset_index(drop=True)
df.to_csv('/content/drive/MyDrive/human_vital_signs_dataset_2024.csv', index=False)


In [None]:
from sklearn.model_selection import train_test_split
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)  # 80% train, 20% temporaire
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)  # 10% val, 10% test
train_df.to_csv('/content/drive/MyDrive/train.csv', index=False)
val_df.to_csv('/content/drive/MyDrive/val.csv', index=False)
test_df.to_csv('/content/drive/MyDrive/test.csv', index=False)

In [None]:
from google.colab import drive
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib

# Monter Google Drive
drive.mount('/content/drive')

# Charger le dataset complet pour récupérer le scaler
df_train = pd.read_csv('/content/drive/MyDrive/train.csv')
df_val = pd.read_csv('/content/drive/MyDrive/val.csv')
df_test = pd.read_csv('/content/drive/MyDrive/test.csv')

# Remap Gender en valeurs numériques
df_train['Gender'] = df_train['Gender'].map({'Male': 1, 'Female': 0})
df_val['Gender'] = df_val['Gender'].map({'Male': 1, 'Female': 0})
df_test['Gender'] = df_test['Gender'].map({'Male': 1, 'Female': 0})

# Séparer les features et la cible (Risk_Category)
X_train = df_train.drop(columns=['Risk_Category'])
y_train = df_train['Risk_Category'].map({'Low Risk': 0, 'High Risk': 1})

X_val = df_val.drop(columns=['Risk_Category'])
y_val = df_val['Risk_Category'].map({'Low Risk': 0, 'High Risk': 1})

X_test = df_test.drop(columns=['Risk_Category'])
y_test = df_test['Risk_Category'].map({'Low Risk': 0, 'High Risk': 1})

# Créer un scaler et ajuster sur le train set
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Normaliser les datasets de validation et test avec le même scaler
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Enregistrer le scaler dans un fichier .pkl
joblib.dump(scaler, '/content/drive/MyDrive/scaler.pkl')

# Reconstituer les dataframes normalisés
df_train_normalized = pd.DataFrame(X_train_scaled, columns=X_train.columns)
df_train_normalized['Risk_Category'] = y_train
df_train_normalized['Gender'] = df_train['Gender']

df_val_normalized = pd.DataFrame(X_val_scaled, columns=X_val.columns)
df_val_normalized['Risk_Category'] = y_val
df_val_normalized['Gender'] = df_val['Gender']

df_test_normalized = pd.DataFrame(X_test_scaled, columns=X_test.columns)
df_test_normalized['Risk_Category'] = y_test
df_test_normalized['Gender'] = df_test['Gender']

# Enregistrer les versions normalisées des fichiers
df_train_normalized.to_csv('/content/drive/MyDrive/train_normalized.csv', index=False)
df_val_normalized.to_csv('/content/drive/MyDrive/val_normalized.csv', index=False)
df_test_normalized.to_csv('/content/drive/MyDrive/test_normalized.csv', index=False)

# Optionnel : afficher les premières lignes pour vérifier
print(df_train_normalized.head())

