In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import f1_score

import mlflow

import json
import requests

In [2]:
data_path = 'creditcard.csv'
df = pd.read_csv(data_path)

In [3]:
# select 80 values from your data frame to query your model with
# Create dictionary with pandas DataFrame in the split orientation
json_data = {"dataframe_split": df.iloc[:80].drop(["Class"], axis=1).to_dict(orient="split")}

In [4]:
# run command line in terminal 
# mlflow models serve --model-uri runs:/494af47968d6411f830202bd8c21759f/log_reg_model -p 1235
response = requests.post("http://127.0.0.1:1235/invocations", json=json_data)

In [5]:
# predictions after querying without scaling
df2 = pd.DataFrame(response.json()['predictions'])

In [6]:
y_true = df.iloc[:80].Class
balanced_acc = balanced_accuracy_score(y_true, df2)

y_true.iloc[-1] = 1
f1score = f1_score(y_true, df2)

print("Balanced_acc", balanced_acc)
print("f1 score", f1score)

Balanced_acc 0.15
f1 score 0.028985507246376812


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y_true.iloc[-1] = 1


In [7]:
# querying with scaling
scaler = StandardScaler()
scaler.fit(df.drop(columns="Class", axis=1))

In [8]:
# transform data
scaled_selection = scaler.transform(df.iloc[:80].drop(["Class"], axis=1))
json_data = {"dataframe_split": pd.DataFrame(scaled_selection).to_dict(orient="split")}
response = requests.post("http://127.0.0.1:1235/invocations", json=json_data)
preds = pd.DataFrame(response.json()['predictions'])
preds

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0
...,...
75,0
76,0
77,0
78,0


In [9]:
y_true = df.iloc[:80].Class
balanced_acc = balanced_accuracy_score(y_true, preds)

y_true.iloc[-1] = 1
f1score = f1_score(y_true, preds)

print("Balanced_acc", balanced_acc)
print("f1 score", f1score)

Balanced_acc 0.4810126582278481
f1 score 0.0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y_true.iloc[-1] = 1


In [10]:
test = df.iloc[:8000]
true = test.Class
test = scaler.transform(test.drop(["Class"], axis=1))
preds = []

batch_size = 80
for f in range(100):
    sample = pd.DataFrame(test[f*batch_size: (f+1)*batch_size]).to_dict(orient="split")
    
    json_data = {"dataframe_split": sample}
    response = requests.post("http://127.0.0.1:1235/invocations", json=json_data)
    resp = pd.DataFrame(response.json()['predictions'])
    preds = np.concatenate((preds, resp[0]))
    
balanced_acc = balanced_accuracy_score(true, preds)
f1score = f1_score(true, preds)

print("Balanced_acc", balanced_acc)
print("f1 score", f1score)

Balanced_acc 0.9665981748374526
f1 score 0.18050541516245486
