# dataPOP

The first step of the script is creating a JSON to keep the taxonomy centralized and up-to-date.

In [3]:
import json 

import pandas as pd

from collections import OrderedDict

In [1]:
def get_column(src, column=1):
    return list(pd.read_csv(src, sep=',', header='infer').iloc[:, column])

In [4]:
SRC = 'n_tertiaryData_structure_20180220.csv'

NAMES = get_column(SRC)

HEADER = ['nameSeq', 'id', 'fullEN', 'shortEN', 'inWritingCZ', 'abrevCZ', 'descriptiveCZ', 'descriptiveEN']

In [5]:
def filter_taxonomy(data=dict(), names={}):   
    """
    Filter taxonomy using tag names

    Args:
        data (dict): taxonomy
    """
    return dict(filter(lambda i:i[1].get('nameSeq') in names, data.items()))

def get_tags_from_taxonomy(data=dict()):
    """
    Get list of tags from taxonomy

    Args:
        data (dict): taxonomy
    """
    return list(data.keys())

def get_text_from_taxonomy(data=dict(), tag=None, descriptor='id'):
    return data.get(tag).get(descriptor)    

## Taxonomy

In [6]:
SRC = 'n_tertiaryData_content_20180220.csv'

data = OrderedDict()

df = pd.read_csv(SRC, sep=',')

for _, row in df.iterrows():    
    data[row['id']] = OrderedDict(((i, row[i]) for i in HEADER))

In [7]:
try:
    with open('taxonomy.json', mode='w+', encoding='utf-8') as f:
        json.dump(data, f)
        print('Taxonomy successfully exported! \u2714')
except Exception as e:
    raise e

Taxonomy successfully exported! ✔


## Tags

In [12]:
with open('taxonomy.json') as f:
    data = json.load(f, object_pairs_hook=OrderedDict)

In [13]:
tags = OrderedDict()

for i, v in enumerate(NAMES, 1):
        
    l = get_tags_from_taxonomy(filter_taxonomy(data, {i}))
    tags[v] = l

In [15]:
with open('tags.json', mode='w+', encoding='utf-8') as f:
    json.dump(tags, f)

## Corrections 

In [16]:
corrections = OrderedDict()

for i, v in enumerate(NAMES, 1):
    tag = get_tags_from_taxonomy(filter_taxonomy(data, {i}))
    corrections[v] = OrderedDict(zip(map(lambda x: x[-4:], tag), tag))

In [17]:
try:
    with open('corrections.json', mode='w+', encoding='utf-8') as f:
        json.dump(corrections, f)
        print('Corrections successfully exported! \u2714')
except Exception as e:
    raise e

Corrections successfully exported! ✔


## Validation

In [24]:
SRC = '/Users/g4brielvs/Desktop/DATA/dataPOP/'
DST = '/Users/g4brielvs/Desktop/DEST/'

In [26]:
import validation

validation.check(SRC, DST)

All files were checked! ✔


In [27]:
SRC = '/Users/g4brielvs/Desktop/DATA/lau2/'
DST = '/Users/g4brielvs/Desktop/DEST/'

In [28]:
import validation

validation.check(SRC, DST)

All files were checked! ✔


In [3]:
SRC = '/Users/g4brielvs/Desktop/test/'
DST = '/Users/g4brielvs/Desktop/DEST/'

import validation
validation.check(SRC, DST)

All files were checked! ✔
