In [None]:
import pandas as pd
from pandas import DataFrame
import numpy as np
from dotenv import load_dotenv

load_dotenv("../.env")

# load demo & total set
df_demo = pd.read_csv(".tmp/dataset/dataset_demo.csv", low_memory=False)
df_total = pd.read_csv(".tmp/dataset/dataset_total.csv", low_memory=False)
df_total["NAME"] = ""

-   LOAN_COUNT (number)
-   AMT_CREDIT_SUM_DEBT_SUM (number)
-   AMT_INCOME_TOTAL (number)
-   DAYS_EMPLOYED (number)


In [None]:
perm_cols = [
    "LOAN_COUNT",
    "AMT_CREDIT_SUM_DEBT_SUM",
    "AMT_INCOME_TOTAL",
    "DAYS_EMPLOYED",
]
quartiles = {
    c: np.quantile(np.asarray(df_total[c]), [0, 0.25, 0.75, 1.0]) for c in perm_cols
}


def get_permutations(row: DataFrame):
    perm_dfs = []
    for c in perm_cols:
        n = row[c].item()
        r = np.linspace(quartiles[c][0], quartiles[c][-1], num=100)
        perm_df = pd.concat([row for _ in range(r.shape[0])], axis=0)
        perm_df[c] = list(r)
        perm_dfs.append(perm_df)

    result_df = pd.concat([row] + perm_dfs, axis=0)
    result_df = result_df.drop_duplicates(keep="first").reset_index(drop=True)
    return result_df

In [None]:
import requests
import os
import json

API_URL = os.environ.get("INFERENCE_API_URL")
API_KEY = os.environ.get("INFERENCE_API_KEY")


def run_permute_inference(row: DataFrame):
    perm_df = get_permutations(row)
    body = json.dumps({"data": perm_df.to_dict(orient="records")})
    result = requests.post(
        f"{API_URL}/inference", data=body, headers={"access_token": API_KEY}
    ).json()
    return result, perm_df

### 결과 분석


In [None]:
col = perm_cols[0]
row = df_demo.sample()
result, perm_df = run_permute_inference(row)

In [None]:
perm_df

In [None]:
result_df = pd.concat(
    [perm_df, DataFrame.from_records(result).drop(columns=["NAME", "SK_ID_CURR"])],
    axis=1,
)

In [None]:
result_df

In [None]:
PROB_THR = 1e-2

result_df.drop_duplicates(subset=col, keep="first")