In [1]:
import requests
import json
import mlflow
import joblib
import pandas as pd
import logging
from commons.load_data import (load_raw_data,
                                  transformed_employee_performance,
                                  feature_engineered_employee_performance)
from commons.engineer_features import handle_features

In [2]:
mlflow.set_tracking_uri("http://localhost:5000")

## Replace with the URI of your x_scaler and y_scaler
## You can get this on the experiment module > artifacts of your MLFlow UI
MLFLOW_XSCALER_URI = "mlflow-artifacts:/2/49adbc0879ac42e5b22fa70fad451c39/artifacts/x_scaler.pkl"
MLFLOW_YSCALER_URI = "mlflow-artifacts:/2/ce63e413d09940c9ae68617a36189440/artifacts/y_scaler.pkl"

In [3]:
logging.info("Downloading standard scalers...")
X_scaler_mlflow = mlflow.artifacts.download_artifacts(MLFLOW_XSCALER_URI)
y_scaler_mlflow = mlflow.artifacts.download_artifacts(MLFLOW_YSCALER_URI)

logging.info("Loading joblib scalers to variable...")
X_scaler = joblib.load(X_scaler_mlflow)
y_scaler = joblib.load(y_scaler_mlflow)
logging.info("Loading Complete.")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [10]:
def get_data() -> pd.DataFrame:
    df = load_raw_data().iloc[4000:4500]
    df.drop(columns=['Employee_Satisfaction_Score'],
            inplace=True)
    df = transformed_employee_performance(data_df=df)
    df = feature_engineered_employee_performance(X_data=df,
                                                 X_scaler=X_scaler,
                                                 reset_index=True)
    return df

In [11]:
df = get_data()
df.head()

Found


Unnamed: 0,Employee_ID,Age,Years_At_Company,Performance_Score,Monthly_Salary,Work_Hours_Per_Week,Projects_Handled,Overtime_Hours,Sick_Days,Remote_Work_Frequency,...,Gender_Other,Job_Title_Consultant,Job_Title_Developer,Job_Title_Engineer,Job_Title_Manager,Job_Title_Specialist,Job_Title_Technician,Education_Level_High School,Education_Level_Master,Education_Level_PhD
0,4001,-0.707199,-0.905767,-1.406227,3850.0,-0.54808,1.35252,-0.410226,0.023399,-0.012669,...,False,False,False,False,False,False,True,True,False,False
1,4002,1.030166,0.506185,-0.689861,4200.0,0.919942,0.790098,1.318984,-1.129268,-0.012669,...,False,False,False,False,False,False,True,True,False,False
2,4003,-0.89008,-0.552779,-1.406227,6050.0,-0.435156,-0.967472,-0.064384,-1.590335,1.394988,...,False,True,False,False,False,False,False,True,False,False
3,4004,-1.347281,-0.552779,1.459238,8250.0,-1.564403,0.790098,-0.294946,0.023399,1.394988,...,False,True,False,False,False,False,False,False,True,False
4,4005,1.395927,0.859173,1.459238,5250.0,0.468243,-0.826866,-1.101911,0.023399,-1.420325,...,False,False,False,False,False,False,True,False,True,False


In [None]:
def get_predictions(df: pd.DataFrame) -> pd.DataFrame:
    """
    Send transformed employee performance data as JSON to the FastAPI 
    endpoint and receive predictions as a Pandas DataFrame.

    Returns
    -------
    pd.DataFrame
        DataFrame containing the predictions.
    """
    employee_id = df['Employee_ID']
    X_df = handle_features(df)
    json_data = X_df.to_json(orient="records")
    url = "http://localhost:8000/predict"
    headers = {"Content-Type": "application/json"}
    response = requests.post(url,
                             json=json.loads(json_data),
                             headers=headers)

    if response.status_code == 200:
        predictions = response.json()
        df = pd.DataFrame(predictions)
        scaled_df = y_scaler.inverse_transform(
            df['predicted_value'].values.reshape(-1, 1)
        )
        scaled_df = pd.DataFrame(scaled_df,
                              columns=['Predicted_Employee_Satisfaction_Score'])
        scaled_df.insert(0, 'Employee_ID', employee_id)
        return scaled_df
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

In [13]:
get_predictions(df)

KeyError: 'Employee_ID'