# Analyze of Moles DB fixture

## Producing file summarizing structure

In [1]:
import json

In [2]:
def json_to_dict(filename='fixture.json'):
    with open(filename, 'r') as f:
        return json.load(f)

In [3]:
def dict_to_json(data, filename):
    with open(filename, 'w') as f:
        f.write(json.dumps(data))

In [4]:
def get_dict_of_attributes(list_of_items):
    data = list_of_items
    result_dict = dict()
    
    for i in data:
        for k, v in i['fields'].items():
            if k not in result_dict or result_dict[k] is None or not result_dict[k]:
                result_dict[k] = v[:50] if isinstance(v, str) else v
                
    return result_dict

In [102]:
def convert_values_to_types(data):
    original_dataset = json_to_dict('fixture2.json')
    for k, v in data.items():
        for k1, v1 in v.items():
            data[k][k1] = get_type(v1, original_dataset)

    return data   

In [108]:
def get_type(attr, data):
    if isinstance(attr, bool):
        return 'bool'
    
    if isinstance(attr, list) and attr:
        if isinstance(attr[0], int):
            list_of_models = [get_model_by_pk(i, data) for i in attr]
            return list(set(list_of_models))
        return [attr[0].__class__.__name__]

    if isinstance(attr, int):
        record = get_model_by_pk(attr, data)
        return record if record else 'int'
    
    return attr.__class__.__name__

In [93]:
def get_model_by_pk(pk, data):
    return ", ".join([i['model'].split('.')[1] for i in data if i['pk'] == pk])

In [7]:
def get_set_of_models(data):
    result = [i['model'] for i in data]
    result = set(result)
    return result

In [8]:
def get_models_with_attributes(data):
    result = dict()
    models = get_set_of_models(data)
    for m in models:
        filtered_list = [i for i in data if i['model'] == m]
        attributes = get_dict_of_attributes(filtered_list)
        result[m] = attributes

    return dict(sorted(result.items()))

In [105]:
def save_summary_to_file(data, filename='output.txt', convert_to_types = True):
    dict_to_json(data, 'fixture2.json')
    
    data = get_models_with_attributes(data)
    if convert_to_types:
        data = convert_values_to_types(data)
        
    with open(filename, 'w') as f:   
        for k, v in data.items():
            f.write(f'{k}\n')
            for k1, v2 in v.items():
                f.write(f'\t{k1}: {v2}\n')
            f.write('\n')

In [24]:
def get_by_pk(pk):
    data = json_to_dict()
    for i in data:
        if i['pk'] == pk:
            yield i

## Flattening fixture

### Refereanceable

In [12]:
def get_model_name(obj):
    return obj['model'].split('.')[1]

In [13]:
map = {
    'acq': 'acquisition',
    'cmppr': 'compositeprocess',
    'coll': 'observationcollection',
    'comp': 'compositeprocess',
    'instr': 'instrument',
    'mpop': 'mobileplatformoperation',
    'ob': 'observation',
    'plat': 'platform',
    'proj': 'project',
    'result': 'result'
    }
def map_shortcode_to_model_name(short_code, full=False):
    if full:
        return f'cedamoles_app.{map[short_code]}'
    return map[short_code]

In [14]:
def get_referenceable_dict_from_list(ref_list):
    output = dict()
    for i in ref_list:
        output[i['pk']] =  i['fields']
    return output

In [15]:
def include_referenceable(data):
    output = []
    referenceable = [i for i in data if i['model'] == 'cedamoles_app.referenceable']
    referenceable = get_referenceable_dict_from_list(referenceable)
    data = [i for i in data if i['model'] != 'cedamoles_app.referenceable']
    
    for i in data:
        pk = i['pk']
        if pk in referenceable and i['model'] == map_shortcode_to_model_name(referenceable[pk]['short_code'] , True):
            my_obj = i
            my_obj['fields']['uuid'] = referenceable[pk]['uuid']
            output.append(my_obj)
        else:
            output.append(i)
            
    return output


### Unused functions

In [52]:
def include_constraints(data):
    values = {i['pk']: i['fields'] for i in data if i['model'] == 'cedamoles_app.constraints'}
    data = [i for i in data if i['model'] != 'cedamoles_app.constraints']

    for i in data:
        if 'permission' in i['fields'] and i['fields']['permission']:
            i['fields']['permission'] = values[i['fields']['permission']]

        elif 'imageConstraints' in i['fields'] and i['fields']['imageConstraints']:
            i['fields']['imageConstraints'] = values[i['fields']['imageConstraints']]
       
    return data
    

In [114]:
def include_discoveryserviceid(data):
    values = {i['pk']: i['fields']['name'] for i in data if i['model'] == 'cedamoles_app.discoveryserviceid'}
    data = [i for i in data if i['model'] != 'cedamoles_app.discoveryserviceid']

    for i in data:
        if 'discoveryKeywords' in i['fields']:
            i['fields']['discoveryKeywords'] = [values[j] for j in i['fields']['discoveryKeywords']]

    return data


In [20]:
def include_dqconformanceresult(data):
    values = {i['pk']: i['fields'] for i in data if i['model'] == 'cedamoles_app.dqconformanceresult'}
    data = [i for i in data if i['model'] != 'cedamoles_app.dqconformanceresult']

    for i in data:
        if 'resultQuality' in i['fields'] and i['fields']['resultQuality']:
            i['fields']['resultQuality'] = values[i['fields']['resultQuality']]

    return data
    

In [80]:
def include_imagedetails(data):
    values = {i['pk']: i['fields'] for i in data if i['model'] == 'cedamoles_app.imagedetails'}
    data = [i for i in data if i['model'] != 'cedamoles_app.imagedetails']

    for i in data:
        if 'imageDetails' in i['fields'] and i['fields']['imageDetails']:
            i['fields']['imageDetails'] = [values[j] for j in i['fields']['imageDetails']]
            
    return data

### Another functions

In [142]:
def include_simple_field(data, model_name, model_and_field_pairs_to_insert_model):
    values = {i['pk']: i['fields'] for i in data if i['model'] == f'cedamoles_app.{model_name}'}
    data = [i for i in data if i['model'] != f'cedamoles_app.{model_name}']

    for i in data:
        for m, f in model_and_field_pairs_to_insert_model:
            if i['model'] == f'cedamoles_app.{m}' and f in i['fields'] and i['fields'][f]:
                if isinstance(i['fields'][f], list):
                    i['fields'][f] = [values[j] for j in i['fields'][f]]
                else:
                    i['fields'][f] = values[i['fields'][f]]
            
    return data

In [110]:
data1 = json_to_dict()

In [111]:
data = include_referenceable(data1)
data = include_discoveryserviceid(data)
data = include_dqconformanceresult(data)
data = include_constraints(data)
data = include_imagedetails(data)
save_summary_to_file(data, 'summary v3.txt', True)

In [143]:
data = json_to_dict()
data = include_referenceable(data)
data = include_simple_field(data, 'discoveryserviceid', [('observation', 'discoveryKeywords'), ('observationcollection', 'discoveryKeywords')])
data = include_simple_field(data, 'dqconformanceresult', [('observation', 'resultQuality')])
data = include_simple_field(data, 'constraints', [('imagedetails', 'imageConstraints'),('observation', 'permission')])
data = include_simple_field(data, 'imagedetails', [('acquisition', 'imageDetails'), 
                                                  ('computation', 'imageDetails'),
                                                  ('instrument', 'imageDetails'),
                                                  ('observationcollection', 'imageDetails'),
                                                  ('observation', 'imageDetails'),
                                                  ('platform', 'imageDetails'),
                                                  ('project', 'imageDetails'),
                                                  ])
data = include_simple_field(data, 'vocabularyterm', [('observation', 'vocabularyKeywords')])
data = include_simple_field(data, 'verticalextent', [('observation', 'verticalExtent')])
data = include_simple_field(data, 'timeperiod', [('mobileplatformoperation', 'operationTime'),
                                                 ('observation', 'timePeriod'),
                                                 ('observation', 'validTimePeriod')])
data = include_simple_field(data, 'party', [('responsiblepartyinfo', 'party'),
                                            ('review', 'commentator'),
                                            ('review', 'reviewer')])
data = include_simple_field(data, 'phenomenonname', [('phenomenon', 'names')])
data = include_simple_field(data, 'phenomenonterm', [('phenomenon', 'terms')])

save_summary_to_file(data, 'sv4.txt', False)

KeyError: None

In [137]:
[print(i['fields']['imageDetails']) for i in data if i['model'] == 'cedamoles_app.acquisition']

[]
[]
[]
[]
[56]
[56]
[56]
[56]
[]
[56]
[56]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[2]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[38]
[]
[69]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[38]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[69]
[]
[]
[]
[]
[]
[69]
[69]
[69]
[69]
[69]
[69]
[69]
[69]
[69]
[69]
[69]
[]
[69]
[]
[]
[69]
[69]
[69]
[69]
[69]
[69]
[]
[]
[]
[]
[50]
[50]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[97]
[9

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,