In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout
from dotenv import load_dotenv
import os

# === Connexion PostgreSQL ===
from sqlalchemy import create_engine, text
host = os.getenv("DB5_HOST")
port = os.getenv("DB5_PORT")
database = os.getenv("DB5_NAME")
username = os.getenv("DB5_USER")
password = os.getenv("DB5_PASSWORD")
db_url = f"postgresql+psycopg2://{username}:{password}@{host}:{port}/{database}"
engine = create_engine(db_url)

# === Requête des données ===
query = """
SELECT 
    c.sexe,
    c.pays,
    c."situationEconomique",
    f."moy_bac_et",
    d."Section",
    d."Type",
    f."score_final",
    c.resultat,
    c.orientation
FROM "factadmission" f
JOIN "DIM_CANDIDAT" c ON f."fkCandidat" = c."pkcandidat"
JOIN "dim_diplome" d ON f."fkDiplome" = d."PkDiplome"
WHERE d."Type" = 'bac';
"""
df = pd.read_sql(query, engine)

# === Encodage des colonnes ===
categorical_cols = ['sexe', 'pays', 'situationEconomique', 'Section', 'orientation']
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# === Features et Target ===
X = df[['sexe', 'pays', 'situationEconomique', 'moy_bac_et', 'Section']]
y = df['orientation']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# === Conversion des types pour éviter les erreurs avec TensorFlow ===
X_train = X_train.astype({
    'sexe': 'int32',
    'pays': 'int32',
    'situationEconomique': 'int32',
    'Section': 'int32',
    'moy_bac_et': 'float32'
})
X_test = X_test.astype({
    'sexe': 'int32',
    'pays': 'int32',
    'situationEconomique': 'int32',
    'Section': 'int32',
    'moy_bac_et': 'float32'
})
y_train = y_train.astype('int32')
y_test = y_test.astype('int32')

# === Embedding Info ===
embedding_info = {col: df[col].nunique() for col in ['sexe', 'pays', 'situationEconomique', 'Section']}

# === Inputs ===
input_sexe = Input(shape=(1,))
input_pays = Input(shape=(1,))
input_situation = Input(shape=(1,))
input_section = Input(shape=(1,))
input_moyenne = Input(shape=(1,))

# === Embeddings ===
embed_sexe = Embedding(input_dim=embedding_info['sexe']+1, output_dim=2)(input_sexe)
embed_pays = Embedding(input_dim=embedding_info['pays']+1, output_dim=4)(input_pays)
embed_situation = Embedding(input_dim=embedding_info['situationEconomique']+1, output_dim=4)(input_situation)
embed_section = Embedding(input_dim=embedding_info['Section']+1, output_dim=4)(input_section)

# === Flatten ===
flat_sexe = Flatten()(embed_sexe)
flat_pays = Flatten()(embed_pays)
flat_situation = Flatten()(embed_situation)
flat_section = Flatten()(embed_section)

# === Concatenation ===
concat = Concatenate()([flat_sexe, flat_pays, flat_situation, flat_section, input_moyenne])

# === Réseau de neurones ===
x = Dense(128, activation='relu')(concat)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)
output = Dense(len(df['orientation'].unique()), activation='softmax')(x)

# === Compilation ===
model = Model(inputs=[input_sexe, input_pays, input_situation, input_section, input_moyenne], outputs=output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# === Entraînement ===
model.fit([ 
    X_train['sexe'], 
    X_train['pays'], 
    X_train['situationEconomique'], 
    X_train['Section'], 
    X_train['moy_bac_et']
], y_train, epochs=10, verbose=1)

# === Prédiction avec nouvelles données (y compris valeur inconnue gérée) ===
nouveau_candidat = {
    'sexe': 'F',
    'pays': 'MA',  # Nouvelle valeur
    'situationEconomique': 'Riche',  # Nouvelle valeur
    'Section': 'science',  # Nouvelle valeur
    'moy_bac_et': 15.7
}

def safe_transform(le, value):
    if value in le.classes_:
        return le.transform([value])[0]
    else:
        le.classes_ = np.append(le.classes_, value)
        return le.transform([value])[0]

encoded = {
    'sexe': safe_transform(label_encoders['sexe'], nouveau_candidat['sexe']),
    'pays': safe_transform(label_encoders['pays'], nouveau_candidat['pays']),
    'situationEconomique': safe_transform(label_encoders['situationEconomique'], nouveau_candidat['situationEconomique']),
    'Section': safe_transform(label_encoders['Section'], nouveau_candidat['Section']),
    'moy_bac_et': nouveau_candidat['moy_bac_et']
}

pred = model.predict([
    np.array([encoded['sexe']]),
    np.array([encoded['pays']]),
    np.array([encoded['situationEconomique']]),
    np.array([encoded['Section']]),
    np.array([encoded['moy_bac_et']])
], verbose=0)

top_k = pred[0].argsort()[-3:][::-1]
orientations = label_encoders['orientation'].inverse_transform(top_k)

print("\n🎓 Top orientations recommandées :")
for i, (o, prob) in enumerate(zip(orientations, pred[0][top_k])):
    print(f"{i+1}. {o} — {round(prob * 100, 2)}%")


Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.4532 - loss: 1.5049
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6182 - loss: 1.1247
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5776 - loss: 1.0931 
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5844 - loss: 1.2139 
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5527 - loss: 1.1724 
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6399 - loss: 1.0454 
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5736 - loss: 1.1023 
Epoch 8/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5735 - loss: 1.1846
Epoch 9/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [2]:
model.save("modele_orientation.h5")
print("✅ Modèle enregistré en 'modele_orientation.h5'")




✅ Modèle enregistré en 'modele_orientation.h5'


In [3]:
import joblib

joblib.dump(label_encoders, "label_encoders.joblib")
print("✅ Encoders enregistrés en 'label_encoders.joblib'")


✅ Encoders enregistrés en 'label_encoders.joblib'
