In [1]:
import numpy as np
import pandas as pd
import random
import string
import json
import ipywidgets as iw

from IPython.display import display
from datetime import datetime as dt

In [2]:
def make_df_from_conf(df_conf):
    num_rows = df_conf['num_rows']
    df = pd.DataFrame()
    for feature in df_conf['df_confs'].keys():
        features = []
        nan_rate = df_conf['df_confs'][feature]['nan_rate']
        dtype = df_conf['df_confs'][feature]['dtype']
        if dtype=='str':
            unique = df_conf['df_confs'][feature]['unique']
            if unique=="Random letter":
                for i in range(num_rows):
                    random_list = [random.choice(string.ascii_lowercase) for n in range(df_conf['df_confs'][feature]['digits'])]
                    features.append("".join(random_list))
            elif unique=="Random number":
                for i in range(num_rows):
                    random_int = random.randint(df_conf['df_confs'][feature]['min'], df_conf['df_confs'][feature]['max'])
                    features.append(str(random_int).zfill(df_conf['df_confs'][feature]['digits']))
            elif unique=="Seaquential number":
                start = df_conf['df_confs'][feature]['start']
                features = [
                    str(i).zfill(df_conf['df_confs'][feature]['digits'])
                    for i in range(start, start+num_rows)
                ]
            elif unique=="yourself":
                features = random.choices(df_conf['df_confs'][feature]['unique_list'].split(','), k=num_rows)
            features = np.array(features)
        elif dtype=='int':
            for i in range(num_rows):
                random_int = random.randint(df_conf['df_confs'][feature]['min'], df_conf['df_confs'][feature]['max'])
                features.append(random_int)
            features = np.array(features)
            if nan_rate!=0:
                print(f'On the int column "{feature}", np.nan are detected. This column is converted to float.')
                features = features.astype(float)
        elif dtype=='float':
            for i in range(num_rows):
                random_int = random.uniform(df_conf['df_confs'][feature]['min'], df_conf['df_confs'][feature]['max'])
                features.append(random_int)
            features = np.array(features)
        elif dtype=='datetime':
            freq = df_conf['df_confs'][feature]['dt_freq'][0]
            date = pd.date_range(start=df_conf['df_confs'][feature]['start'], end=df_conf['df_confs'][feature]['end'], freq=freq)
            date = np.tile(date.values, num_rows//len(date)+1)
            features = date[:num_rows]
        
        if nan_rate!=0:
            nan_idx = random.sample(range(num_rows), round(num_rows*nan_rate))
            features[nan_idx] = np.nan
        df[feature] = features
    return df

In [3]:
def make_df(widgets):
    df_conf = {'num_rows':widgets.children[2].value, 'df_confs':{}}
    dtypes = {}
    
    for boxes in widgets.children[3:]:
        for box in boxes.children:
            if isinstance(box, iw.widget_button.Button):
                break
            feature = box.children[0].value
            dtype = box.children[2].value
            df_conf['df_confs'][feature] = {}
            df_conf['df_confs'][feature]['nan_rate'] = box.children[1].value
            df_conf['df_confs'][feature]['dtype'] = dtype
            dtypes[feature] = dtype
            if dtype=='str':
                unique = box.children[3].children[0].value
                df_conf['df_confs'][feature]['unique'] = unique
                if unique=='Random letter':
                    df_conf['df_confs'][feature]['digits'] = box.children[3].children[1].children[0].value
                elif unique=='Random number':
                    df_conf['df_confs'][feature]['digits'] = box.children[3].children[1].children[0].value
                    df_conf['df_confs'][feature]['max'] = box.children[3].children[1].children[1].value
                    df_conf['df_confs'][feature]['min'] = box.children[3].children[1].children[2].value
                elif unique=='Seaquential number':
                    df_conf['df_confs'][feature]['digits'] = box.children[3].children[1].children[0].value
                    df_conf['df_confs'][feature]['start'] = box.children[3].children[1].children[1].value
                elif unique=='yourself':
                    df_conf['df_confs'][feature]['unique_list'] = box.children[3].children[1].children[0].value
            elif dtype=='int':
                df_conf['df_confs'][feature]['max'] = box.children[3].children[0].value
                df_conf['df_confs'][feature]['min'] = box.children[3].children[1].value
            elif dtype=='float':
                df_conf['df_confs'][feature]['max'] = box.children[3].children[0].value
                df_conf['df_confs'][feature]['min'] = box.children[3].children[1].value
            elif dtype=='datetime':
                df_conf['df_confs'][feature]['dt_start'] = box.children[3].children[0].value
                df_conf['df_confs'][feature]['dt_end'] = box.children[3].children[1].value
                df_conf['df_confs'][feature]['dt_freq'] = box.children[3].children[2].value
        
    return df_conf, make_df_from_conf(df_conf)

In [17]:
def str_unique_setting(unique):
    unique_widgets = []
    if unique == 'Random letter':
        unique_widgets.append(
            iw.BoundedIntText(
                value=8,
                min=1,
                max=10,
                step=1,
                description='Char digits',
                disabled=False,
            )
        )
    elif unique == 'Random number':
        unique_widgets.append(
            iw.BoundedIntText(
                value=8,
                min=1,
                max=10,
                step=1,
                description='Char digits',
                disabled=False,
            )
        )
        unique_widgets.append(
            iw.IntText(
                step=1,
                description='Max',
                disabled=False
            )
        )
        unique_widgets.append(
            iw.IntText(
                step=1,
                description='Min',
                disabled=False
            )
        )
    elif unique == 'Seaquential number':
        unique_widgets.append(
            iw.BoundedIntText(
                value=8,
                min=1,
                max=10,
                step=1,
                description='Char digits',
                disabled=False,
            )
        )
        unique_widgets.append(
            iw.IntText(
                step=1,
                description='Start',
                disabled=False
            )
        )
    elif unique == 'yourself':
        unique_widgets.append(
            iw.Text(
                description='Unique list',
                value='',
                placeholder='aaa,bbb,ccc,..(*No space)',
            )
        )
    return iw.VBox(unique_widgets)
        

def str_setting():
    unique = iw.Dropdown(
        description="Unique",
        options = ["Random letter", "Random number", "Seaquential number", "yourself"],
        value=None,
    )
    def on_value_change(change) -> None:
        if change['name'] == 'value':
            if len(df_str.children)>=2:
                pop_n_widgets(df_str, -1)
            df_str.children += (str_unique_setting(change['new']),)
    
    unique.observe(on_value_change, names='value')
    df_str = iw.VBox([unique])
    return df_str


def int_setting():
    int_max = iw.IntText(
        step=1,
        description='Max',
        disabled=False
    )
    int_min = iw.IntText(
        step=1,
        description='Min',
        disabled=False
    )
    df_int = iw.VBox([int_max, int_min])
    return df_int


def float_setting():
    float_max = iw.FloatText(
        step=0.01,
        description='Max',
        disabled=False
    )
    float_min = iw.FloatText(
        step=0.01,
        description='Min',
        disabled=False
    )
    df_float = iw.VBox([float_max, float_min])
    return df_float


def datetime_setting():
    dt_start = iw.Text(
        description='Start',
        value='',
        placeholder='YYYY/MM/DD',
    )
    dt_end = iw.Text(
        description='End',
        value='',
        placeholder='YYYY/MM/DD',
    )
    freq = iw.Dropdown(
        description="Freq",
        options = ["Daily", "Monthly", "Yearly"],
        value=None,
    )
    df_dt = iw.VBox([dt_start, dt_end, freq])
    return df_dt


def make_df_setting():
    featurename = iw.Text(
        description='Feat Name',
        value='',
    )
    nan_rate = iw.BoundedFloatText(
        step=0.01,
        value=0.0,
        max=1.0,
        min=0.0,
        description='NaN rate',
        disabled=False
    )
    dtype = iw.Dropdown(
        description="Dtype",
        options = ["str", "int", "float", "datetime"],
        value=None,
    )
    
    def on_value_change(change) -> None:
        if change['name'] == 'value':
            if len(df_conf.children)>=4:
                pop_n_widgets(df_conf, -1)
            
            if change['new'] == 'str':
                df_conf.children += (str_setting(),)
            elif change['new'] == 'int':
                df_conf.children += (int_setting(),)
            elif change['new'] == 'float':
                df_conf.children += (float_setting(),)
            elif change['new'] == 'datetime':
                df_conf.children += (datetime_setting(),)
    
    dtype.observe(on_value_change, names='value')
    
    df_conf = iw.VBox([featurename, nan_rate, dtype], layout=iw.Layout(border='1px solid #C8C8C8'))
    return df_conf


def pop_n_widgets(box, n):
    """
    box widgetsのn番目のwidgetsをpopする.
    """
    widgets = list(box.children)
    pop_w = widgets.pop(n)
    box.children = tuple(widgets)
    return pop_w



def show_widgets(df_confs=None):
    """
    df_confs(Dict)を受け取りその内容をもとにwidgets GUIを作成する.
    
    Args:
        df_confs (Dict[str, str]): dfを作成するための情報
    """    
    make_button = iw.Button(description='make DF')
    
    
    
    df_name = iw.Text(
        description='DF Name',
        value='test_df',
    )
    num_rows = iw.BoundedIntText(
        value=100,
        min=0,
        max=1e8,
        step=1,
        description='Num Rows',
        disabled=False,
    )
    df_conf = make_df_setting()
    add_button = iw.Button(description='Add Feature')
    
    df_confs = iw.VBox([make_button, df_name, num_rows, iw.Box([df_conf, add_button])])
    
    def on_click(clicked_button: iw.Button) -> None:
        if len(df_confs.children[-1].children) < 4:
            add_button = pop_n_widgets(df_confs.children[-1], -1)
            df_confs.children[-1].children += (make_df_setting(), add_button)
        else:
            add_button = pop_n_widgets(df_confs.children[-1], -1)
            df_confs.children[-1].children += (make_df_setting(),)
            df_confs.children += (iw.Box([add_button]),)
    
    out = iw.Output(layout={'border': '1px solid black'})
    def on_click_make_df(clicked_button: iw.Button) -> None:
        with out:
            df_dict, df = make_df(df_confs)
            with open(f'{df_name.value}.json', mode='w') as f:
                json.dump(df_dict, f, indent=2, ensure_ascii=False)
            df.to_csv(f'{df_name.value}.csv', index=False)
    
    add_button.on_click(on_click)
    make_button.on_click(on_click_make_df)
    display(df_confs, out)

In [None]:
def upload_json():
    out = iw.Output(layout={'border': '1px solid black'})
    uploads = iw.Text(
        description='JSON Path',
        value='',
        placeholder='xxxx.json',
    )
    with out:
        try:
            with open(uploads.value, mode='r') as f:
                df_dict = json.load(f)
        except Excepton as e:
            print(e)
        
        show_widgets(df_dict)

In [18]:
show_widgets()

VBox(children=(Button(description='make DF', style=ButtonStyle()), Text(value='test_df', description='DF Name'…

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…