This file is responsible for processing the given .csv datafile into a single json file format

The main reason for doing so is that we need the following structure for translation:
```
translation: {
    language1: "string",
    language2: "string"
}
```

Since we are doing mapping, we replace the language with our data fields of interest, e.g tag_name, tag_description, thing, property

In [3]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
from pathlib import Path

# Get Data

## get data (old)

In [None]:
data_path = "."
file_list = [Path(f).with_suffix('') for f in listdir(data_path) if isfile(join(data_path, f)) and f.endswith('.csv')]

file_list
df_list = []
for f in file_list:
    data_path = f"{f}.csv"
    fields = ['tag_name', 'tag_description', 'thing', 'property']
    df = pd.read_csv(data_path, skipinitialspace=True, usecols=fields)
    df_list.append(df)

df = pd.concat(df_list, ignore_index=True)

## get data (new)

In [4]:
data_path = f"../test_data/data_mapping_filtered.csv"
fields = ['tag_name', 'tag_description', 'thing', 'property']
df = pd.read_csv(data_path, skipinitialspace=True, usecols=fields)

# create a json data for "thing"

In [5]:
import json

output_list = []

for _, row in df.iterrows():
    translation = {
        'translation': {
            'tag_description': row['tag_description'],
            'thing': row['thing'],
        }
    }
    output_list.append(translation)

# file_path = 'translations_thing.json'
file_path = 'translations_thing.json'
with open(file_path, 'w') as json_file:
    json.dump(output_list, json_file, indent=4)

# create a json data for "property"

In [6]:
import json

output_list = []

for _, row in df.iterrows():
    translation = {
        'translation': {
            'tag_description': row['tag_description'],
            #'thing': row['thing']
            'property': row['property'],
        }
    }
    output_list.append(translation)
    
# file_path = 'translations_thing.json'
file_path = 'translations_property.json'
with open(file_path, 'w') as json_file:
    json.dump(output_list, json_file, indent=4)

# create a json data for "thing" with concatenated tag fields

We will introduce a separator token

tag_name +\<sep\> + tag_description

In [16]:
import json

output_list = []

for _, row in df.iterrows():
    translation = {
        'translation': {
            'tag_description': row['tag_name'] + '<SEP>' + row['tag_description'],
            #'thing': row['thing']
            'property': row['property'],
        }
    }
    output_list.append(translation)
    
# file_path = 'translations_thing.json'
file_path = 'translations_property_concat_input.json'
with open(file_path, 'w') as json_file:
    json.dump(output_list, json_file, indent=4)