In [1]:
import joblib
import pandas as pd
import numpy as np
import json
from pprint import pprint


In [2]:
reg_model = joblib.load("../predict/models/reg_model.pkl")
class_model = joblib.load("../predict/models/class_model.pkl")
input_features = joblib.load("../predict/features/input_features.pkl")
num_out_features = joblib.load("../predict/features/num_out_features.pkl")
cat_out_features = joblib.load("../predict/features/cat_out_features.pkl")

In [3]:
print(reg_model)
print(type(reg_model))

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num',
                                                  Pipeline(steps=[('scaler',
                                                                   StandardScaler())]),
                                                  Index(['Age', 'High_School_GPA', 'SAT_Score', 'University_Ranking',
       'University_GPA', 'Internships_Completed', 'Projects_Completed',
       'Certifications', 'Soft_Skills_Score', 'Networking_Score',
       'Job_Offers'],
      dtype='object')),
                                                 ('cat',
                                                  Pipeline(steps=[('onehot',
                                                                   OneHotEncoder())]),
                                                  Index(['Gender', 'Field_of_Study', 'Current_Job_Level'], dtype='object'))])),
                ('pca', PCA(n_components=0.95)),
                ('regressor',
               

In [4]:
print(class_model)
print(type(class_model))

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num',
                                                  Pipeline(steps=[('scaler',
                                                                   StandardScaler())]),
                                                  Index(['Age', 'High_School_GPA', 'SAT_Score', 'University_Ranking',
       'University_GPA', 'Internships_Completed', 'Projects_Completed',
       'Certifications', 'Soft_Skills_Score', 'Networking_Score',
       'Job_Offers'],
      dtype='object')),
                                                 ('cat',
                                                  Pipeline(steps=[('onehot',
                                                                   OneHotEncoder())]),
                                                  Index(['Gender', 'Field_of_Study', 'Current_Job_Level'], dtype='object'))])),
                ('pca', PCA(n_components=0.95)),
                ('classifier', RandomForestCl

In [5]:
print(input_features)
print(type(input_features))

['Age', 'Gender', 'High_School_GPA', 'SAT_Score', 'University_Ranking', 'University_GPA', 'Field_of_Study', 'Internships_Completed', 'Projects_Completed', 'Certifications', 'Soft_Skills_Score', 'Networking_Score', 'Job_Offers', 'Current_Job_Level']
<class 'list'>


In [6]:
print(num_out_features)
print(type(num_out_features))

['Starting_Salary', 'Career_Satisfaction', 'Years_to_Promotion', 'Work_Life_Balance']
<class 'list'>


In [7]:
print(cat_out_features)
print(type(cat_out_features))

['Entrepreneurship']
<class 'list'>


In [8]:
data = {
    "Age": 23,
    "Gender": "Male",
    "High_School_GPA": 0.8,
    "SAT_Score": 0.914,
    "University_Ranking": 0.23,
    "University_GPA": 0.85,
    "Field_of_Study": "Computer Science",
    "Internships_Completed": 2,
    "Projects_Completed": 5,
    "Certifications": 3,
    "Soft_Skills_Score": 7.8,
    "Networking_Score": 6.5,
    "Job_Offers": 1,
    "Current_Job_Level": "Entry",
}

In [9]:
with open("../predict/features/feature_types.json") as f:
    feature_types = json.load(f)

type_map = {
    "int": int,
    "float": float,
    "str": str
}

processed_data = []
for feature in input_features:
    value = data[feature]
    data_type = type_map[feature_types[feature]]
    print(data_type)
    processed_data.append(data_type(value))

input_df = pd.DataFrame([processed_data], columns=input_features)


<class 'int'>
<class 'str'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'str'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'float'>
<class 'float'>
<class 'int'>
<class 'str'>


In [10]:
input_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 14 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Age                    1 non-null      int64  
 1   Gender                 1 non-null      object 
 2   High_School_GPA        1 non-null      float64
 3   SAT_Score              1 non-null      float64
 4   University_Ranking     1 non-null      float64
 5   University_GPA         1 non-null      float64
 6   Field_of_Study         1 non-null      object 
 7   Internships_Completed  1 non-null      int64  
 8   Projects_Completed     1 non-null      int64  
 9   Certifications         1 non-null      int64  
 10  Soft_Skills_Score      1 non-null      float64
 11  Networking_Score       1 non-null      float64
 12  Job_Offers             1 non-null      int64  
 13  Current_Job_Level      1 non-null      object 
dtypes: float64(6), int64(5), object(3)
memory usage: 240.0+ bytes


In [11]:
reg_pred = reg_model.predict(input_df)
print(reg_pred)
print(type(reg_pred))

[[5.06942208e+04 5.61075000e+00 3.01625000e+00 5.48675000e+00]]
<class 'numpy.ndarray'>


In [12]:
class_pred = class_model.predict(input_df).reshape(1, -1)
print(class_pred)
print(type(class_pred))

[['No']]
<class 'numpy.ndarray'>


In [13]:
res = {}
for i, feature in enumerate(num_out_features):
    res[feature] = reg_pred[0][i]
for i, feature in enumerate(cat_out_features):
    res[feature] = class_pred[i]

# res = clean(res)

pprint(res)


{'Career_Satisfaction': np.float64(5.61075),
 'Entrepreneurship': array(['No'], dtype=object),
 'Starting_Salary': np.float64(50694.220813800304),
 'Work_Life_Balance': np.float64(5.48675),
 'Years_to_Promotion': np.float64(3.01625)}


In [14]:
def clean(obj):
    # convertion to standard data types from numpy datatypes
    for k, v in obj.items():
        obj[k] = round(v.item()) if type(v.item()) == float else v.item()

In [15]:
clean(res)
pprint(res)

{'Career_Satisfaction': 6,
 'Entrepreneurship': 'No',
 'Starting_Salary': 50694,
 'Work_Life_Balance': 5,
 'Years_to_Promotion': 3}
