In [1]:
from helpers.dataset import get_dataset
from src.framework import construct_cim

from helpers.dataset import MUFFIN_TASK, CUSTOM_TASKS, CROSS_TASK_TASKS

# task = MUFFIN_TASK
task = CUSTOM_TASKS[14]

results = construct_cim(task, get_dataset(task), "segmentation_v1")

dataset = results['labeled_dataset']

In [5]:
from collections import defaultdict
import sys
import json

from helpers.dataset import IMPORTANT_TYPES_FINE, IMPORTANT_TYPE_DESCRIPTIONS_FINE

def display_tutorial_contexts(tutorial, include_keys=None, include_content_types=None):
    """
    print in markdown table format where columns are keys and rows are values
    """
    columns = defaultdict(list)
    
    columns['info_type'] = []
    columns['content'] = []

    for piece in tutorial['pieces']:
        if include_content_types is not None and piece['content_type'] not in include_content_types:
            continue
        columns['info_type'].append(piece['content_type'])
        if 'content' in columns:
            columns['content'].append(piece['content'])
        for key, value in piece['labels'].items():
            if include_keys is None or key in include_keys:
                columns[key].append(value[-1])
    contents = columns['content']
    del columns['content']
    columns['content'] = contents
    
    markdown_table = "| " + " | ".join(columns.keys()) + " |\n"
    markdown_table += "| " + " | ".join(["---"] * len(columns)) + " |\n"
    for i in range(len(columns[list(columns.keys())[0]])):
        markdown_table += "| " + " | ".join([columns[key][i] for key in columns.keys()]) + " |\n"
    markdown_table += "| " + " | ".join(["---"] * len(columns)) + " |\n"
    print(markdown_table)

def display_units(dataset, include_keys=None, include_content_types=None):
    import matplotlib.pyplot as plt
    units = defaultdict(list)
    for tutorial in dataset:
        for piece in tutorial['pieces']:
            if include_content_types is not None and piece['content_type'] not in include_content_types:
                continue
            units[piece['unit_id']].append(piece)
    
    ### sort in descending order of number of pieces
    units = sorted(units.items(), key=lambda x: len(x[1]), reverse=True)

    ### show distribution of pieces across units (#of pieces vs #of units)
    lengths = defaultdict(int)
    for _, pieces in units:
        lengths[len(pieces)] += 1
    
    print(json.dumps(lengths, indent=4))

    # for unit_id, pieces in units:
    #     print(f"Unit {unit_id} ({len(pieces)} pieces) ")
    #     for piece in pieces:
    #         print(f"  - {piece['content_type']}: {piece['content']}")
    #     print()

def display_type_distribution(dataset):
    per_type = defaultdict(int)

    for tutorial in dataset:
        for piece in tutorial['pieces']:
            if piece['content_type'] in IMPORTANT_TYPES_FINE:
                per_type[piece['content_type']] += 1
    
    ### sort in descending order of number of pieces
    per_type = sorted(per_type.items(), key=lambda x: x[1], reverse=True)

    print(json.dumps(per_type, indent=4))

In [6]:
for i in range(len(dataset)):
    display_tutorial_contexts(dataset[i])
# display_units(dataset, include_keys=['context_stage'])
# display_units(dataset, include_keys=['context_stage'], 
# include_content_types=IMPORTANT_TYPES_FINE)
# display_type_distribution(dataset)

| info_type | context_stage | context_component | context_action | context_material | context_purpose | content |
| --- | --- | --- | --- | --- | --- | --- |
| Greeting - Opening | Preparation | Hat | Plan | Paper | General | Today I will teach you how to make a paper hat out of a newspaper. |
| Overview - Briefing | Preparation | Fold/Flap | Plan | Paper | General | The paper hat fold requires five steps. |
| Method - Tool | Preparation | Materials | Plan | Paper | General | Get a newspaper. |
| Description - Tool Specification | Preparation | Materials | Plan | Paper | Personal | The present newspaper is the Conex East Garland news. |
| Miscellaneous - Side Note | Preparation | Materials | Plan | Paper | Personal | The speaker remarks the newspaper is interesting. |
| Method - Instruction | Construction | Fold/Flap | Manipulate | Paper | General | Fold the paper in half. |
| Explanation - Justification | Construction | Fold/Flap | Manipulate | Paper | Practical | Folding the paper is