### Run docker container

In [98]:
port = 8091
!docker pull rafikrze/harmfulness_app:0.1
!docker run -p port:8000 -d rafikrze/harmfulness_app:0.1

4b8751b1556c22f83f6a9fd20d2b7e7855f3fbe2e2c7d921520720374f29b838


### Load libraries and data

In [64]:
import requests
import json
import pandas as pd
import numpy as np
from sklearn import metrics
from tqdm.auto import tqdm

text_test = pd.read_csv('https://storage.googleapis.com/poleval_data/test_set_only_text.txt',
                        sep="\n", header=None, names=['text'], encoding='utf-8')

url = 'http://localhost:{}/v1/predict/'.format(port)
data_block = {"text": "to jest pierwszy test api"}
headers = {'token': 'test'}

## Measure time for same post data

In [59]:
%%timeit
requests.post(url, data=json.dumps(data_block), headers=headers)

11.2 ms ± 192 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Sample different data with API and measure time

In [39]:
%%timeit
data_block = {"text": text_test.sample(1).values[0][0]}
r = requests.post(url, data=json.dumps(data_block), headers=headers)

25.5 ms ± 1.33 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Check metrics

In [96]:
# test / same functionality as in experiment
tag_test = pd.read_csv('https://storage.googleapis.com/poleval_data/test_set_only_tags.txt', sep="\n", header=None, names=['tag'], encoding='utf-8')

data_test = pd.concat([text_test, tag_test], axis=1)

# check data shape
assert(data_test.shape == (1000, 2))
print("The shape of the test data:", data_test.shape)

The shape of the test data: (1000, 2)


In [82]:
predictions_bin = list()
predictions_args = list()
real_value = list()
bad_text = list()

for index, row in tqdm(data_test.iterrows()):
    data_block = {"text": row['text']}
    r = requests.post(url, data=json.dumps(data_block), headers=headers)
    try:
        prediction = json.loads(r.content)
    except:
        # when api recognize bad language or to short twit continue
        bad_text.append(row)
        continue
    predictions_bin.append(int(prediction['prediction']))
    predictions_args.append(prediction['prediction_proba'])
    real_value.append(row['tag'])

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [90]:
# check binary and probability scores
assert all(predictions_bin == np.argmax(predictions_args, axis=1))

# Model Test Accuracy
print("Tested cases: ", len(real_value))
print("Logistic Regression Accuracy:",metrics.accuracy_score(real_value, predictions_bin))
print("Logistic Regression F1 micro:",metrics.f1_score(real_value, predictions_bin, average='micro'))
print("Logistic Regression F1 macro:",metrics.f1_score(real_value, predictions_bin, average='macro'))

Tested cases:  914
Logistic Regression Accuracy: 0.8654266958424508
Logistic Regression F1 micro: 0.8654266958424508
Logistic Regression F1 macro: 0.4676581707197771


In [94]:
#check what type of values were not included
pd.DataFrame(bad_text).groupby('tag').count()

Unnamed: 0_level_0,text
tag,Unnamed: 1_level_1
0,80
2,6
