In [26]:
import os
import json 

if os.path.exists('demo_employees.vicol1'):
    os.remove('demo_employees.vicol1')

with open('demo_employees.vicol1', 'w') as f:
    
    meta = {
        'columns': {
            'first_name': {'start': 0, 'end': 0},
            'last_name': {'start': 0, 'end': 0},
            'department': {'start': 0, 'end': 0},
            'salary': {'start': 0, 'end': 0},
            'age': {'start': 0, 'end': 0},
            'email': {'start': 0, 'end': 0},
            'street': {'start': 0, 'end': 0},
            'city': {'start': 0, 'end': 0},
            'notes': {'start': 0, 'end': 0},
            'points': {'start': 0, 'end': 0},
            'country': {'start': 0, 'end': 0},
            'country_code': {'start': 0, 'end': 0},
            'birth_date': {'start': 0, 'end': 0},
            'is_new': {'start': 0, 'end': 0}
        }
    }
    f.write(json.dumps(meta))
    

In [32]:
from faker import Faker
faker = Faker('en')

import numpy as np

def create_employee(salary):
    first_name = faker.first_name()
    last_name = faker.last_name()
    department = faker.random_element(elements=['Embedded','BigData','HR'])
    age = faker.random_int(min=20, max=50)
    email = faker.email()
    street = faker.street_address()
    city = faker.city()
    notes = faker.catch_phrase()
    points = faker.random_int(min=0, max=3000)
    country = faker.country()
    country_code = faker.country_code()
    birth_date = faker.date_of_birth(minimum_age=15, maximum_age=40)
    is_new = faker.boolean()
    
    return {
        'first_name': first_name, 
        'last_name': last_name, 
        'department': department, 
        'age': age,
        'salary': salary, 
        'email': email,
        'street': street,
        'city': city,
        'notes': notes,
        'points': points,
        'country': country,
        'country_code': country_code,
        'birth_date': birth_date.strftime('%Y-%m-%d'),
        'is_new': is_new
    }

salaries = np.random.normal(16000, 6000, 100000)

employees = []

for salary in salaries:
    employees.append(create_employee(int(salary)))



In [33]:
import json

def decode_file(path):
    with open(path, 'r') as f:
        header = json.loads(f.readline())
        end_of_meta = f.tell()
        columns = []
        for column_name in header['columns'].keys():
            column = header['columns'][column_name]
            f.seek(end_of_meta + column['start'])
            data_string = f.read(column['end'] - column['start']) 
            data = data_string.split(',') if data_string else []
            columnData = { 'name': column_name, 'data': data}
            columns.append(columnData)
    return (header, columns)


def encode_file(path, columns):
    new_header = {'columns': {}}
    current_offset = 0
    for column in columns:
        joined_data_len = len(','.join(column['data']))
        new_header['columns'][column['name']] = { 
            'start': current_offset, 
            'end': current_offset + joined_data_len
        }
        current_offset = current_offset + joined_data_len

    with open(path, 'w') as f:
            f.write(json.dumps(new_header) + '\n')
            for column in columns:
                f.write(','.join(column['data']))


def insert(path, employee):
    header, columns = decode_file(path)
    for column in columns:
        column['data'].append(f'{employee[column["name"]]}')
    encode_file(path, columns)
        

In [None]:
for e in employees:
    insert('demo_employees.vicol1', e)

In [None]:
!tail -n 10 ./demo_employees.vicol1