## Renaming to prefix tags 

Taxonomy represents the formal structure of classes or types of objects within a domain of IT4 Big Data Lab information processing. 

It organizes knowledge by using a controlled vocabulary to make it possible to find related information.

In [2]:
import glob
import json 
import os
import shutil

import pandas as pd

from collections import OrderedDict

In [3]:
def get_column(src, column=1):
    return list(pd.read_csv(src, sep=',', header='infer', converters={'intID': lambda x: str(x)}, encoding='cp1252').iloc[:, column])

def get_header(src):
    df = pd.read_csv(src, sep=',', header='infer', encoding='cp1252')
    return list(df.columns.values)

In [4]:
SRC = '/Users/g4brielvs/Dropbox/Workspace/Names/n_tertiaryData_structure_20180220.csv'

NAMES = get_column(SRC, 1)
NAMES

['dataSource',
 'graphType',
 'graphDetail',
 'spacialAggregation',
 'temporalAggregation',
 'userTclass',
 'trajectoryTimeDefinitionPoint',
 'fixedRoute',
 'calculationMethod',
 'statistics',
 'statisticsDetail',
 'transportMode',
 'demographyGender',
 'demographyAge',
 'dataDetail',
 'controlSum']

## New renaming

In [5]:
DST = '/Users/g4brielvs/Desktop/DEST/'

def rename(src, prefix):
    """
    Checks source for filenames and makes a valid copy into destination

    Args:
        src (str): path to source
        dst (str): path to destination
    """
    for pathname in glob.iglob(os.path.join(src, '**/*.csv'), recursive=True):
        (dirname, filename) = os.path.split(pathname)
        (name, extension) = os.path.splitext(filename)

        tags_in_filename = name.strip().split('_')
        tags = list()
        
        for i, item in enumerate(NAMES):
            item = tags_in_filename[i]
            new_item = '{}-{}'.format(i + 4, item[2:])
            tags.append(new_item)
                    
        new_filename = '{}-{}{}'.format(prefix, '_'.join(map(str, tags)), extension)
        new_file = os.path.join(DST, new_filename)
        os.makedirs(DST, exist_ok=True)
        
        shutil.copy(pathname, new_file)

## Validation

In [32]:
SRC = '/Users/g4brielvs/Desktop/1-00-nr_2-50-COM_3-01-L1/'

rename(SRC, '1-00-nr_2-50-COM_3-01-L1')

In [33]:
SRC = '/Users/g4brielvs/Desktop/1-33-ter_2-04-TACSU_3-01-L1/'

rename(SRC, '1-33-ter_2-04-TACSU_3-01-L1')

In [34]:
SRC = '/Users/g4brielvs/Desktop/1-33-ter_2-04-TACSU_3-02-L2/'

rename(SRC, '1-33-ter_2-04-TACSU_3-02-L2')

## Content

In [7]:
SRC = '/Users/g4brielvs/Desktop/DATA/'
DST = '/Users/g4brielvs/Desktop/DEST/'

drop_columns = ['RO2ID','RO2PORADI','RELACENAZEV','DENNAZEV','ROD1PORADINAZEV','RO2NAZEV','HODINANAZEV']

for pathname in glob.iglob(os.path.join(SRC, '**/*.csv'), recursive=True):
        (dirname, filename) = os.path.split(pathname)

        df = pd.read_csv(pathname, sep=',', header=0)
         
        df['fileName'] = 'N'    
            
        # Drop columns
        df = df.drop(drop_columns, axis=1)
        
        df.to_csv(os.path.join(DST, filename), sep=',', header=False, index=False, encoding='utf-8')