In [10]:
# Load packages
import pandas as pd 
import numpy as np
import pickle
from mlflow.tracking import MlflowClient
from matplotlib import pyplot as plt
import json

In [11]:
MODEL_PATH = "../models/model/model.pkl"
DATA_PATH = "../data/processed/data/data_detaims"
RUN_ID = '92ed66d742664ef78606066f1ed74a8c'
TRACKING_URI = "../mlflow"

In [12]:

# Create an instance of the MlflowClient
client = MlflowClient(tracking_uri=TRACKING_URI)

# Get the run
run = client.get_run(run_id=RUN_ID)

# Get the data and model artifacts
data_artifact_uri = run.info.artifact_uri + "/data/data_details"
model_artifact_uri = run.info.artifact_uri + "/model/model.pkl"


# Load the data and model

# Load the pipeline from the pickle file
with open(model_artifact_uri, 'rb') as file:
    classifier = pickle.load(file)


classifier

In [13]:


# Read the JSON file
with open(data_artifact_uri, "r") as file:
    data = json.load(file)

features = pd.DataFrame(data=np.zeros((1, len(data['features_names']))), columns=data['features_names'],
                        dtype=int)

target = pd.DataFrame(data=np.zeros((1, len(data['targets_names']))), columns=data['targets_names'], dtype=int)




In [14]:
skills = ['Firebird', 'Firebase Realtime Database', 'Couch DB']
target_job = 'Data scientist or machine learning specialist'
features[skills] = 1

In [15]:
target.columns.get_loc(target_job)

3

In [16]:
prediction = classifier.predict_proba(features.values)
target.iloc[0] = prediction[0] * 100

init_percent = target[target_job].values[0]
target_job_index = target.columns.get_loc(target_job)
featuers_effect = []

for skill in data['features_names']:
    new_features = features.copy()
    new_features[skill] = 1
    prediction = classifier.predict_proba(new_features.values)[0]
    effect = (prediction[target_job_index]*100 - init_percent)/init_percent
    featuers_effect.append(effect)


In [17]:
featuers_effect = pd.Series(data=featuers_effect, index=data['features_names'])
featuers_effect.sort_values(ascending=False, inplace=True)
threshold = 5
featuers_effect = featuers_effect[featuers_effect > threshold]
featuers_effect

Python                       97.327610
Scikit-Learn                 92.669347
Pandas                       43.894407
Heroku                       40.971110
Pip                          16.355778
LLVM's Clang                 15.872306
R                             9.655135
Keras                         9.426155
NumPy                         8.465331
Amazon Web Services (AWS)     8.343323
FastAPI                       7.790123
Express                       5.362199
dtype: float64