# ARCHIVE
This notebook was used as a one-off investigation when the python PIC-SURE API was not updated. Keeping this code for reference in case it comes handy in the future for other QA notebooks.

## Quick validation script
This notebook will be used to validate that the BABYHUG metadata was loaded correctly into the integration environment. This script uses the output from running a search for "babyhug" in PIC-SURE and saving the results to a file.

In [None]:
metadata_file = "../output/babyhug_metadata.json" # Will need to change when running again
integration_file = "../input/babyhug_export.txt" # Will need to change when running again

import json
# Opening JSON file
f = open(metadata_file)
  
# returns JSON object as 
# a dictionary
metadata = json.load(f)

f = open(integration_file)
export = json.load(f)

In [None]:
metadata

<table>
    <thead>
        <tr>
            <th>Metadata field</th>
            <th>Export field</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td rowspan=4>variable_id</td>
            <td>derived_var_id<td>
        </tr>
        <tr>
            <td>columnmeta_name</td>
        </tr>
        <tr>
            <td>columnmeta_var_id</td>
        </tr>
        <tr>
            <td>derived_var_name</td>
        </tr>
        <tr>
            <td rowspan=2>variable_name</td>
            <td>description***</td>
        </tr>
        <tr>
            <td>derived_var_description</td>
        </tr>
        <tr>
            <td rowspan=2>form_group_description</td>
            <td>columnmeta_var_group_description</td>
        </tr>
        <tr>
            <td>derived_group_description</td>
        </tr>
        <tr>
            <td rowspan=2>study_phs_number</td>
            <td>derived_study_id</td>
        </tr>
        <tr>
            <td>columnmeta_study_id</td>
        </tr>
        <tr>
            <td>study_name</td>
            <td>derived_study_description</td>
        </tr>
        <tr>
            <td>form_group_name</td>
            <td>derived_group_name</td>
        </tr>
    </tbody>
</table>


In [None]:
mapping_dict = {'variable_id':['derived_var_id', 'columnmeta_name', 'columnmeta_var_id', 'derived_var_name'],
 'variable_name':['description', 'derived_var_description'],
 'form_group_description':['columnmeta_var_group_description', 'derived_group_description'],
 'study_phs_number':['derived_study_id', 'columnmeta_study_id'],
 'study_name':['derived_study_description'],
 'form_group_name':['derived_group_name']}

In [None]:
import pandas as pd
metatable = pd.DataFrame(columns=['variable_id', 'variable_name', 'form_group_name', 
                                  'form_group_description', 'study_name', 'study_phs_number', 'form_name'])

meta_studyname = metadata[0]['study_name']
meta_studyphs = metadata[0]['study_phs_number']
for formgroup in metadata[0]['form_group']:
    meta_formgroup = formgroup['form_group_name']
    meta_formdesc = formgroup['form_group_description']
    for vargroup in formgroup['form']:
        formname = vargroup['form_name']
        for variable in vargroup['variable_group'][0]['variable']:
            meta_varid = variable['variable_id']
            meta_varname = variable['variable_name']
            metatable = metatable.append({'variable_id': meta_varid,
                            'variable_name':meta_varname,
                            'form_group_name':meta_formgroup,
                            'form_group_description':meta_formdesc,
                            'study_name':meta_studyname,
                            'study_phs_number':meta_studyphs
                           }, ignore_index=True)
    

In [None]:
metatable

In [None]:
exporttable = pd.DataFrame(columns=['variable_id', 'variable_name', 'form_group_name', 
                                  'form_group_description', 'study_name', 'study_phs_number', 'values'])
for result in export['results']['searchResults']:
    derived_var_id = result['result']['metadata']['derived_var_id']
    columnmeta_name = result['result']['metadata']['columnmeta_name']
    columnmeta_var_id = result['result']['metadata']['columnmeta_var_id']
    derived_var_name = result['result']['metadata']['derived_var_name']
    var = [derived_var_id, columnmeta_name, columnmeta_var_id, derived_var_id]
    test = all(elem == var[0] for elem in var)
    if test is not True:
        print(derived_var_id)
    #print(result)
    #description = result['result']['metadata']['description']
    derived_var_description = result['result']['metadata']['derived_var_description']
    #desc = [description, derived_var_description]
    test = all(elem == desc[0] for elem in desc)
    #if test is not True:
    #    print(derived_var_id)
    #    print(description)
    #    print(derived_var_description)
    derived_study_id = result['result']['metadata']['derived_study_id']
    columnmeta_study_id = result['result']['metadata']['columnmeta_study_id']
    ids = [derived_study_id, columnmeta_study_id]
    test = all(elem == ids[0] for elem in ids)
    if test is not True:
        print(derived_study_id)
    columnmeta_var_group_description = result['result']['metadata']['columnmeta_var_group_description']
    derived_group_description = result['result']['metadata']['derived_group_description']
    groups = [columnmeta_var_group_description, derived_group_description]
    test = all(elem == groups[0] for elem in groups)
    if test is not True:
        print(columnmeta_var_group_description)
    derived_study_description = result['result']['metadata']['derived_study_description']
    derived_group_name = result['result']['metadata']['derived_group_name']
    values = result['result']['metadata']['values']
    exporttable = metatable.append({'variable_id': derived_var_id,
                            'variable_name':derived_var_description,
                            'form_group_name':derived_group_name,
                            'form_group_description':columnmeta_var_group_description,
                            'study_name':derived_study_description,
                            'study_phs_number':derived_study_id,
                                    'values':values
                           }, ignore_index=True)

In [None]:
metatable[metatable.variable_id == 'OBS']

In [None]:
exporttable[exporttable.variable_id == 'OTHER_EYE']

In [None]:
meta = metatable.sort_values(by=metatable.columns.tolist()).drop_duplicates().reset_index(drop=True)
expo = exporttable.sort_values(by=exporttable.columns.tolist()).drop_duplicates().reset_index(drop=True)
print(meta.equals(expo))

In [None]:
metatable.equals(exporttable)

In [None]:
pd.concat([metatable,exporttable]).drop_duplicates(keep=False)

In [None]:
export

In [None]:
metatable

In [None]:
# Group by everything and compare the counts - figure out which count is off and that is the missing variable
grouped_meta = metatable.groupby(by='variable_id').count()#list(metatable.columns))
grouped_export = exporttable.groupby(by='variable_id').count()
#list(metatable.columns)

In [None]:
grouped_meta

In [None]:
grouped_export

In [None]:
grouped_meta[grouped_meta.variable_name != grouped_meta.study_phs_number]

In [None]:
for i in grouped_export.index:
    #print(grouped_export.loc[i].variable_name)
    if grouped_export.loc[i].variable_name != grouped_meta.loc[i].variable_name:
        print(i)

In [None]:
grouped_meta.loc['TRANSPLANT_RSN_SP']

In [None]:
grouped_export.loc['TRANSPLANT_RSN_SP']

In [None]:
metatable[metatable.variable_id == 'TRANSPLANT_RSN_SP']

In [None]:
exporttable[exporttable.variable_id == 'TRANSPLANT_RSN_SP']