In [None]:
from typing import Dict, List

from joblib import load
from pandas import DataFrame, read_csv
from scipy.sparse import hstack, random
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
def predict(
    data: Dict = None
) -> float:
    if not(data is None):
        def preproc(
            df: DataFrame,
            strCols: List[str]
        ) -> DataFrame:
            res = df.copy()
            res.loc[:, strCols] = res.loc[:, strCols].fillna('nan')
            for col in res:
                if res[col].dtype == object:
                    res[col] = res[col].str.lower()
                    res[col] = res[col].replace('[^a-zA-Z0-9]',
                                                ' ', regex=True)
            return res

        def getTextFeats(enc):
            text = read_csv('data/salary-train.csv', usecols=[0])
            text = preproc(df=text, strCols=text.columns)
            enc.fit_transform(text['FullDescription'])

        strCols = ['LocationNormalized', 'ContractTime']
        data = DataFrame(data, index=[0])
        data = preproc(df=data, strCols=strCols)

        enc_categ = DictVectorizer()
        X_categ = enc_categ.fit_transform(data[strCols].to_dict('records'))

        enc_text = TfidfVectorizer(
            max_features=24627 - X_categ.get_shape()[-1])
        getTextFeats(enc=enc_text)
        X_text = enc_text.transform(data['FullDescription'])

        X = hstack([X_text, X_categ])

    else:
        # Предсказание на случайном примере
        X = random(
            m=1,  # количество объектов
            n=24627
        )

    clf = load(filename='model.pkl')
    return clf.predict(X=X)

In [None]:
from pandas import read_csv
from requests import post

data_test = read_csv(
    'data/salary-test-mini.csv'
)

for idx in data_test.index:
    print(predict(data_test.loc[idx].to_dict()))