# Routine QA notebook

This notebook generates an quick table detailing why the QA operations went wrong. Just run everything and open the generated file.

Danilo Lessa Bernardineli

## Dependences

In [1]:
import sys
sys.path.append("..")

In [2]:
import numpy as np
import pandas as pd
import time

from meta_qa.integrations import BigQueryIntegration
from meta_qa.tools import run_qa_pipeline

## QA pipeline execution

In [31]:
def dict_test(cell):
    
    if type(cell) is dict:
        return True
    return False

In [32]:
integration = BigQueryIntegration("idwall-data", "dw_idwall")

time1 = time.time()
qa_output = run_qa_pipeline(integration, n_workers=512)
time2 = time.time()

New pandarallel memory created - Size: 2000 MB
Pandarallel will run on 512 workers


## Visualization

In [33]:
delta_t = time2 - time1
tests_data = qa_output["result"]
raw_tests_data = qa_output["raw_result"]
operation_tests = raw_tests_data.applymap(dict_test)
stacked_operations = (pd.DataFrame(raw_tests_data[operation_tests].stack()
                                                                  .dropna())[0]
                        .apply(pd.Series))
n_total = stacked_operations.result.count()
n_ok = stacked_operations.result.sum() / n_total * 100
n_fail = 100 - n_ok
text = "Ran {} operations at {:.1f}s.\n{:.0f}% OK and {:.0f}% FAILED.".format(n_total,
                                                                       delta_t,
                                                                       n_ok,
                                                                       n_fail)
print(text)

Ran 301 operations at 20.0s.
83% OK and 17% FAILED.


In [34]:
failed_operations = stacked_operations.where(lambda df: df["result"] == False).dropna()

### Generate an HTML page for seeing what went wrong

In [29]:
pd.set_option('colheader_justify', 'center')   # FOR TABLE <th>

html_string = '''
<html>
  <head><title>HTML Pandas Dataframe with CSS</title></head>
  <link rel="stylesheet" type="text/css" href="style.css"/>
  <body>
    {table}
  </body>
</html>.
'''

# OUTPUT AN HTML FILE
with open('teste.html', 'w') as f:
    f.write(html_string.format(table=failed_operations.to_html(classes='dataframe')))

In [36]:
# Or an excel file
stacked_operations.to_excel("teste.xls")