In [624]:
import json
from io import TextIOWrapper

In [625]:
import pandas as pd

In [626]:
from dataclasses import dataclass

In [627]:
import unittest
from abc import abstractmethod
import sys

## Датасет Molweni

https://github.com/HIT-SCIR/Molweni

содержит многосторонние диалоги из Ubuntu Chat Corpus (переписки на форуме, посвящённом операционной системе Ubuntu) с разметкой в рамках SDRT - Segmented Discourse Representation Theory. Предназначен для задач Machine Reading Comprehension.


объём: 10 000 диалогов

подробнее об SDRT: каждый диалог представлен как ориентированный ациклический граф, где Узел - это высказывание; Ребро - это дискурсивная связь; Направление ребра - направление дискурсивной зависимости. Взаимосвязанные высказывания обозначаются как Arg1 и Arg2. Подход SDRT различает 16 типов дискурсивных связей. EDU считается одна реплика одного пользователя, т.е. Высказывания пользователя на части не сегментируется.

**Данные устроены так:**

есть 3 файла json: dev-Copy1, test-Copy1, train-Copy1

в каждом из них по ключу data следующая структура:

по ключу title (для каждого файла значение соответсвенно dev, test или train) лежит список dialogues; 

в списке dialogues - словари, в каждом словаре следующие ключи:
- edus (список EDU каждого диалога, каждое EDU - словарь с ключами text и speaker)
- context (склеенные в одну строку элементы edus, speaker : text)
- qas (вопросы и ответы разметчиков; у каждого вопроса есть: id; question - сам текст вопроса; answers - список кусочков текста из context (?), где есть ответ на вопрос, и answer_start - число {возможно, с какого элемента в строке context начинается ответ}; is_impossible: true/false формат, если хоть один ответ на вопрос есть, то значение false, если список answers пуст, то true)
- relations (отношения между EDU в этом диалоге; у каждого отношения есть: x и y - Arg1 и Arg2 соответственно; type - тип отношения)

## класс Molweni_into_pandas

Мы хотим создать таблицу pandas, где данные представлялись бы удобно, построчно для каждого диалога (т.е. таблица со следующими столбцами: 'dialogue ID', 'context', 'EDUs', 'relations', 'QAs'). Для этого создаём класс, который предоставляет данные в удобном виде. В функции классов будет передаваться переменная - датасет Molweni в формате json. Функции класса будут читать датасет и доставать из него необходимые элементы, сразу формируя из них строки для таблицы.

In [628]:
def decor_exceptions_for_inint(function_init):
    def exceptions_to_wrap_around_init(self, input_name):
        try:
            function_init(self, input_name)
        except FileNotFoundError: 
            print('File named <<', 
                  input_name, '>> is not found. Please check the spelling!')         
        except ValueError:
            print('File named <<', 
                  input_name, '>> is not JSON file and cannot be accepted. Only JSON file can do!')         
        except (OSError, TypeError):
            print(input_name, ' is ', 
                  type(input_name), ''' and cannot be accepted. Should be class 'str' ''')
    return exceptions_to_wrap_around_init

In [629]:
def decor_for_into_pandas_lines(function_pandas_lines):
    def print_after_pandas_lines(self, some_dict):
        x = function_pandas_lines(self, some_dict)
        print('Successfully unwraped! You can call in via .pandas_data')
        return x
    return print_after_pandas_lines

In [630]:
def decor_exceptions_for_unfold(function_unfold):
    def exceptions_to_wrap_around_unfold(self, some_dict, some_int):
        try:
            x1, x2, x3, x4, x5 = function_unfold(self, some_dict, some_int)
            return x1, x2, x3, x4, x5
        except Exception:
            print('''Something is wrong with data itself. Make sure it's Molweni format!''')
    return exceptions_to_wrap_around_unfold

In [631]:
class Molweni_into_pandas:
    filename: str
    file: TextIOWrapper
    data_from_file: dict
    pandas_data: list
    
    @decor_exceptions_for_inint
    def __init__(self, input_filename: str) -> None:
        file = open(input_filename, 'r', encoding='utf-8')
        self.data_from_file = json.load(file)
        self.pandas_data = self.into_pandas_lines(self.data_from_file)
        
    @decor_for_into_pandas_lines
    def into_pandas_lines(self, data: dict) -> list:        
        raw_dialogues = data['data']['dialogues']
        unfld_data = [list(self.unfold(raw_dialogues[n], n+1)) 
                      for n in range(len(raw_dialogues))]
        return unfld_data
    
    @decor_exceptions_for_unfold
    def unfold(self, folded_dialogue: dict, number: int) -> list:
        dialogue_ID = 'd' + str(number)
        
        context = folded_dialogue['context']
        
        raw_edus = folded_dialogue['edus']
        edus_of_this_dialogue = [{'edu_ID':'d' + str(number)  + 'e' + str(y+1), 
                        'speaker':raw_edus[y]['speaker'], 
                        'text':raw_edus[y]['text']} for y in range(len(raw_edus))]
        
        raw_relations = folded_dialogue['relations']
        relations_of_this_dialogue = [{'relation_ID':'d' + str(number)  + 'r' + str(z+1), 
                                'type':raw_relations[z]['type'], 
                                'Arg1':'e' + str(raw_relations[z]['x']+1), 
                                'Arg2':'e' + str(raw_relations[z]['y']+1)} for z in range(len(raw_relations))] 

        qas = folded_dialogue['qas']
        return dialogue_ID, context, edus_of_this_dialogue, relations_of_this_dialogue, qas                 

In [632]:
mp = Molweni_into_pandas('test-Copy1.json')

Successfully unwraped! You can call in via .pandas_data


In [633]:
tabledata = mp.pandas_data

In [634]:
pd.DataFrame(tabledata, columns=['dialogue ID', 'context', 'EDUs', 'relations', 'QAs'])

Unnamed: 0,dialogue ID,context,EDUs,relations,QAs
0,d1,jujubee_: that i use between linux and windows...,"[{'edu_ID': 'd1e1', 'speaker': 'JuJuBee_', 'te...","[{'relation_ID': 'd1r1', 'type': 'Clarificatio...","[{'question': 'Where does JuJuBee_ use ?', 'id..."
1,d2,mohammadag: replace block with your dev node b...,"[{'edu_ID': 'd2e1', 'speaker': 'MohammadAG', '...","[{'relation_ID': 'd2r1', 'type': 'Narration', ...","[{'question': 'What can replace block ?', 'id..."
2,d3,sugi: i guess there is not enough gamers here ...,"[{'edu_ID': 'd3e1', 'speaker': 'sugi', 'text':...","[{'relation_ID': 'd3r1', 'type': 'Clarificatio...","[{'question': 'What is sugi going to do ?', 'i..."
3,d4,gnomefreak: to me it looks like its trying to ...,"[{'edu_ID': 'd4e1', 'speaker': 'gnomefreak', '...","[{'relation_ID': 'd4r1', 'type': 'Acknowledgem...","[{'question': 'Who opened the repositories？', ..."
4,d5,warbler: adebo : what are you entering for the...,"[{'edu_ID': 'd5e1', 'speaker': 'warbler', 'tex...","[{'relation_ID': 'd5r1', 'type': 'QAP', 'Arg1'...",[{'question': 'What was warbler entering for t...
5,d6,niuq: there is any way to disable tapping from...,"[{'edu_ID': 'd6e1', 'speaker': 'niuq', 'text':...","[{'relation_ID': 'd6r1', 'type': 'Clarificatio...","[{'question': 'What does niuq want to do ?', '..."
6,d7,nbx909: how do i find the address of a usb dev...,"[{'edu_ID': 'd7e1', 'speaker': 'nbx909', 'text...","[{'relation_ID': 'd7r1', 'type': 'Q-Elab', 'Ar...","[{'question': 'What does nbx909 want to do ?',..."
7,d8,cmclean: geforce4 mx 440 with agp8x ( gpu 0 de...,"[{'edu_ID': 'd8e1', 'speaker': 'Cmclean', 'tex...","[{'relation_ID': 'd8r1', 'type': 'Narration', ...",[{'question': 'What does the command you gave ...
8,d9,noone: can some one help me with linux warpfor...,"[{'edu_ID': 'd9e1', 'speaker': 'noone', 'text'...","[{'relation_ID': 'd9r1', 'type': 'Q-Elab', 'Ar...",[{'question': 'What is everybody using here ?'...
9,d10,rabiddachshund: `` button 3 '' refers to the m...,"[{'edu_ID': 'd10e1', 'speaker': 'rabiddachshun...","[{'relation_ID': 'd10r1', 'type': 'QAP', 'Arg1...",[{'question': 'What refers to the clicking of ...


Красиво, но можно лучше

## класс-наследник Molweni_into_beautiful_pandas (переопределение метода)

Всё то же самое, но без лишних скобочек от словарей и списков. Для EDU и relations в рамках одной ячейки - красивое построчное выведение по одному. Для этого наследуем всё, но переопределим функцию into_pandas_lines.

P.S. для столбика Q&A понадобится разработать отдельную систему выведения

In [635]:
def to_overcome_QA_problem(beauty_func):
    def QA_problem_wraping(self, some_list):
        try:
            results = beauty_func(self, some_list)
            return results
        except TypeError:
            return some_list
    return QA_problem_wraping

In [636]:
class Molweni_into_beautiful_pandas(Molweni_into_pandas):
    
    @decor_for_into_pandas_lines
    def into_pandas_lines(self, data: dict)-> list:
        unwr_data = []
        raw_dialogues = data['data']['dialogues']
        for n in range(len(raw_dialogues)):
            (dialogue_ID, 
             context, 
             edus_of_this_dialogue,
            relations_of_this_dialogue, 
             qas) = self.unfold(raw_dialogues[n], n+1)
            new_line = [dialogue_ID, 
                        context, 
                        self.beauty_maker(edus_of_this_dialogue), 
                        self.beauty_maker(relations_of_this_dialogue), 
                        self.beauty_maker(qas)]
            unwr_data.append(new_line)          
        return unwr_data
    
    @to_overcome_QA_problem
    def beauty_maker(self, in_work_list: list) -> list:
        list_of_new_ones = []
        for one in in_work_list:
            parts = [key + ': ' + value for key, value in one.items()]
            new_one = ' -> '.join(parts)
            list_of_new_ones.append(new_one)
        beautiful_new_ones = '\n'.join(list_of_new_ones)
        return beautiful_new_ones

In [637]:
mb = Molweni_into_beautiful_pandas('test-Copy1.json')

Successfully unwraped! You can call in via .pandas_data


In [638]:
tabledata_vol2 = mb.pandas_data

In [639]:
pd.DataFrame(tabledata_vol2, columns=['dialogue ID', 'context', 'EDUs', 'relations', 'QAs'])

Unnamed: 0,dialogue ID,context,EDUs,relations,QAs
0,d1,jujubee_: that i use between linux and windows...,edu_ID: d1e1 -> speaker: JuJuBee_ -> text: tha...,relation_ID: d1r1 -> type: Clarification_quest...,"[{'question': 'Where does JuJuBee_ use ?', 'id..."
1,d2,mohammadag: replace block with your dev node b...,edu_ID: d2e1 -> speaker: MohammadAG -> text: r...,relation_ID: d2r1 -> type: Narration -> Arg1: ...,"[{'question': 'What can replace block ?', 'id..."
2,d3,sugi: i guess there is not enough gamers here ...,edu_ID: d3e1 -> speaker: sugi -> text: i guess...,relation_ID: d3r1 -> type: Clarification_quest...,"[{'question': 'What is sugi going to do ?', 'i..."
3,d4,gnomefreak: to me it looks like its trying to ...,edu_ID: d4e1 -> speaker: gnomefreak -> text: t...,relation_ID: d4r1 -> type: Acknowledgement -> ...,"[{'question': 'Who opened the repositories？', ..."
4,d5,warbler: adebo : what are you entering for the...,edu_ID: d5e1 -> speaker: warbler -> text: adeb...,relation_ID: d5r1 -> type: QAP -> Arg1: e1 -> ...,[{'question': 'What was warbler entering for t...
5,d6,niuq: there is any way to disable tapping from...,edu_ID: d6e1 -> speaker: niuq -> text: there i...,relation_ID: d6r1 -> type: Clarification_quest...,"[{'question': 'What does niuq want to do ?', '..."
6,d7,nbx909: how do i find the address of a usb dev...,edu_ID: d7e1 -> speaker: nbx909 -> text: how d...,relation_ID: d7r1 -> type: Q-Elab -> Arg1: e1 ...,"[{'question': 'What does nbx909 want to do ?',..."
7,d8,cmclean: geforce4 mx 440 with agp8x ( gpu 0 de...,edu_ID: d8e1 -> speaker: Cmclean -> text: gefo...,relation_ID: d8r1 -> type: Narration -> Arg1: ...,[{'question': 'What does the command you gave ...
8,d9,noone: can some one help me with linux warpfor...,edu_ID: d9e1 -> speaker: noone -> text: can so...,relation_ID: d9r1 -> type: Q-Elab -> Arg1: e1 ...,[{'question': 'What is everybody using here ?'...
9,d10,rabiddachshund: `` button 3 '' refers to the m...,edu_ID: d10e1 -> speaker: rabiddachshund -> te...,relation_ID: d10r1 -> type: QAP -> Arg1: e1 ->...,[{'question': 'What refers to the clicking of ...


## класс-наследник для Q&A (расширение метода)

А теперь мы хотим, не меняя исходного кода, добавить возможность точно так же красиво оформлять данные столбика Q&A.

In [640]:
def QA_wraper_for_beauty_maker(beauty_maker_func):
    def the_QAwraping(self, some_list):
        try:
            bno = beauty_maker_func(self, some_list)
            return bno
        except TypeError:
            list_of_excepted_ones = []
            for one in some_list:
                excepted_parts = []
                for one_key, one_value in one.items():
                    if type(one_value) == list and len(one_value) != 0:
                        for inner_key, inner_value in one_value[0].items():
                            inner_part = inner_key + ': ' + str(inner_value)
                            excepted_parts.append(inner_part)
                    else:
                        part = one_key + ': ' + str(one_value)
                        excepted_parts.append(part)
                new_one = ' -> '.join(excepted_parts)
                list_of_excepted_ones.append(new_one)
            beautiful_new_excepted_ones = '\n'.join(list_of_excepted_ones)
            return(beautiful_new_excepted_ones)
    return the_QAwraping

In [641]:
class Molweni_into_pandas_QA_edition(Molweni_into_beautiful_pandas):
        
    @QA_wraper_for_beauty_maker
    def beauty_maker(self, in_work_list: list) -> list:
        return super().beauty_maker(self, in_work_list)       

In [642]:
mqa = Molweni_into_pandas_QA_edition('test-Copy1.json')

Successfully unwraped! You can call in via .pandas_data


In [643]:
tabledata_vol3 = mqa.pandas_data

In [644]:
pd.DataFrame(tabledata_vol3, columns=['dialogue ID', 'context', 'EDUs', 'relations', 'QAs'])

Unnamed: 0,dialogue ID,context,EDUs,relations,QAs
0,d1,jujubee_: that i use between linux and windows...,edu_ID: d1e1 -> speaker: JuJuBee_ -> text: tha...,relation_ID: d1r1 -> type: Clarification_quest...,question: Where does JuJuBee_ use ? -> id: 3f4...
1,d2,mohammadag: replace block with your dev node b...,edu_ID: d2e1 -> speaker: MohammadAG -> text: r...,relation_ID: d2r1 -> type: Narration -> Arg1: ...,question: What can replace block ? -> id: 28b...
2,d3,sugi: i guess there is not enough gamers here ...,edu_ID: d3e1 -> speaker: sugi -> text: i guess...,relation_ID: d3r1 -> type: Clarification_quest...,question: What is sugi going to do ? -> id: 6c...
3,d4,gnomefreak: to me it looks like its trying to ...,edu_ID: d4e1 -> speaker: gnomefreak -> text: t...,relation_ID: d4r1 -> type: Acknowledgement -> ...,question: Who opened the repositories？ -> id: ...
4,d5,warbler: adebo : what are you entering for the...,edu_ID: d5e1 -> speaker: warbler -> text: adeb...,relation_ID: d5r1 -> type: QAP -> Arg1: e1 -> ...,question: What was warbler entering for the pa...
5,d6,niuq: there is any way to disable tapping from...,edu_ID: d6e1 -> speaker: niuq -> text: there i...,relation_ID: d6r1 -> type: Clarification_quest...,question: What does niuq want to do ? -> id: 6...
6,d7,nbx909: how do i find the address of a usb dev...,edu_ID: d7e1 -> speaker: nbx909 -> text: how d...,relation_ID: d7r1 -> type: Q-Elab -> Arg1: e1 ...,question: What does nbx909 want to do ? -> id:...
7,d8,cmclean: geforce4 mx 440 with agp8x ( gpu 0 de...,edu_ID: d8e1 -> speaker: Cmclean -> text: gefo...,relation_ID: d8r1 -> type: Narration -> Arg1: ...,question: What does the command you gave sudo ...
8,d9,noone: can some one help me with linux warpfor...,edu_ID: d9e1 -> speaker: noone -> text: can so...,relation_ID: d9r1 -> type: Q-Elab -> Arg1: e1 ...,question: What is everybody using here ? -> id...
9,d10,rabiddachshund: `` button 3 '' refers to the m...,edu_ID: d10e1 -> speaker: rabiddachshund -> te...,relation_ID: d10r1 -> type: QAP -> Arg1: e1 ->...,question: What refers to the clicking of the w...


## Класс-наследник для создания отдельных списков (расширение метода)

Генерирует отдельные списки диалогов целиком (контекстов), EDU, отношений и вопросов. При этом, где требуется, добавляется ID диалога.

In [645]:
class Molweni_lists(Molweni_into_pandas_QA_edition):
    list_of_contexts: list
    list_of_EDUs: list
    list_of_relations: list
    list_of_QAs: list
    
    @decor_exceptions_for_inint
    def __init__(self, input_filename: str) -> None:
        super().__init__(input_filename)
        self.list_of_contexts = self.list_maker(self.data_from_file, 'contexts')
        self.list_of_EDUs = self.list_maker(self.data_from_file, 'EDUs')
        self.list_of_relations = self.list_maker(self.data_from_file, 'relations')
        self.list_of_QAs = self.list_maker(self.data_from_file, 'QAs')
        print('You can call specific list via .list_of_contexts , .list_of_EDUs , .list_of_relations or .list_of_QAs!')
    
    def list_maker(self, data: dict, request_name: str) -> list:
        raw_dialogues = data['data']['dialogues']
        list_of_results = []
        for n in range(len(raw_dialogues)):
            (dialogue_ID, 
             context, 
             edus_of_this_dialogue, 
             relations_of_this_dialogue, 
             qas) = self.unfold(raw_dialogues[n], n+1)
            if request_name == 'contexts':
                context_item = dialogue_ID + ' >>> ' + context
                list_of_results.append(context_item)
            if request_name == 'EDUs':
                list_of_results.append(self.beauty_maker(edus_of_this_dialogue))
            if request_name == 'relations':
                list_of_results.append(self.beauty_maker(relations_of_this_dialogue))
            if request_name == 'QAs':
                qa_item = dialogue_ID + ' >>> ' + self.beauty_maker(qas)
                list_of_results.append(qa_item)
        return list_of_results

In [646]:
ml = Molweni_lists('test-Copy1.json')

Successfully unwraped! You can call in via .pandas_data
You can call specific list via .list_of_contexts , .list_of_EDUs , .list_of_relations or .list_of_QAs!


In [647]:
ml.list_of_contexts[10]

"d11 >>> bhuey: what about via command line remotely ? bullgard4_: please read url having done so , please put here in this channel a more specific question . flannel: i do n't think that 's relevant . bullgard4_: ah ! what do you think is relevant ? flannel: url and url gives you gui and manual methods of setting static ips ( it still uses filepath bhuey: what about command line ? this is a remote machine flannel: see the second link , the serverguide one ."

In [648]:
print(ml.list_of_EDUs[10])

edu_ID: d11e1 -> speaker: bhuey -> text: what about via command line remotely ?
edu_ID: d11e2 -> speaker: bullgard4_ -> text: please read URL having done so , please put here in this channel a more specific question .
edu_ID: d11e3 -> speaker: Flannel -> text: i do n't think that 's relevant .
edu_ID: d11e4 -> speaker: bullgard4_ -> text: ah ! what do you think is relevant ?
edu_ID: d11e5 -> speaker: Flannel -> text: URL and URL gives you gui and manual methods of setting static ips ( it still uses FILEPATH
edu_ID: d11e6 -> speaker: bhuey -> text: what about command line ? this is a remote machine
edu_ID: d11e7 -> speaker: Flannel -> text: see the second link , the serverguide one .


In [649]:
print(ml.list_of_relations[10])

relation_ID: d11r1 -> type: QAP -> Arg1: e1 -> Arg2: e2
relation_ID: d11r2 -> type: Comment -> Arg1: e2 -> Arg2: e3
relation_ID: d11r3 -> type: Clarification_question -> Arg1: e3 -> Arg2: e4
relation_ID: d11r4 -> type: QAP -> Arg1: e1 -> Arg2: e5
relation_ID: d11r5 -> type: Clarification_question -> Arg1: e5 -> Arg2: e6
relation_ID: d11r6 -> type: QAP -> Arg1: e6 -> Arg2: e7


In [650]:
print(ml.list_of_QAs[10])

d11 >>> question: What does Flannel think ? -> id: 240679b6ca7c02fd27189e3a9cd490f4 -> text: do n't think that 's relevant -> answer_start: 161 -> is_impossible: False
question: What gives you gui and manual methods ? -> id: e9097168bbddefc41f0f74cbf29837f2 -> text: url and url -> answer_start: 251 -> is_impossible: False
question: What is a remote machine ? -> id: 353fd99d1d064411354eeca68cb449a7 -> text: the serverguide one -> answer_start: 432 -> answers: [] -> is_impossible: True
question: What does bhuey want to do ? -> id: 159640a7bff5d39960ef76b93128eaed -> text: via command line remotely -> answer_start: 18 -> is_impossible: False
question: What is the suggestion of Flannel ? -> id: dedce5d522c476c6035e847566edf4a5 -> text: setting static ips -> answer_start: 299 -> is_impossible: False
question: What Flannel doesn't think ? -> id: bae221c109cf878c9ad76c8b2efc2486 -> text: that 's relevant -> answer_start: 174 -> is_impossible: False
question: What is it according to bhuey ? ->

## Dataclass

In [651]:
@dataclass
class Molweni_data:
    raw_data: dict
    processed_data: list
    contexts: list
    EDUs: list
    relations: list
    QAs: list 

    def _init_(self: 'Molweni_data', _raw_data: dict, _processed_data: list, 
               _contexts: list, _EDUs: list, _relations: list, _QAs: list):
        self.raw_data = _raw_data
        self.processed_data = _processed_data
        self.contexts = _contexts
        self.EDUs = _EDUs
        self.relations = _relations
        self.QAs = _QAs

In [652]:
dtcls = Molweni_data(ml.data_from_file, ml.pandas_data, ml.list_of_contexts,
                    ml.list_of_EDUs, ml.list_of_relations, ml.list_of_QAs)

In [653]:
dtcls.EDUs

["edu_ID: d1e1 -> speaker: JuJuBee_ -> text: that i use between linux and windows\nedu_ID: d1e2 -> speaker: nit-wit -> text: did you mount it with fstab ? give us a pastebin of the fstab that is probably it eh.EMOJI\nedu_ID: d1e3 -> speaker: ikonia -> text: it 's treated as a mount mask\nedu_ID: d1e4 -> speaker: zykotick9 -> text: umask are subtracted , so your other group has no permission\nedu_ID: d1e5 -> speaker: ikonia -> text: what ever permissions you feel fit\nedu_ID: d1e6 -> speaker: MrNthDegree -> text: outdated by loads , it has 9.04 as the newest version\nedu_ID: d1e7 -> speaker: SuperMiguel -> text: they are back in 9.04 : s",
 "edu_ID: d2e1 -> speaker: MohammadAG -> text: replace block with your dev node\nedu_ID: d2e2 -> speaker: blue_pearl -> text: here is the output when i ran the comman\nedu_ID: d2e3 -> speaker: blue_pearl -> text: URL here is the output\nedu_ID: d2e4 -> speaker: MohammadAG -> text: i 'm assuming it 's not raid ?\nedu_ID: d2e5 -> speaker: blue_pearl -> 

## Unittest

In [655]:
class MolweniTest(unittest.TestCase):
    
    def setUp(self):
        print("setting up an object")
        
        '''len(dtcls.raw_data['data']['dialogues']) # 100
        dtcls.raw_data['data']['dialogues'][5]['qas'][19]['id'] # 'e49d165b91894835fd254dfc28d087a9'
        len(dtcls.raw_data['data']['dialogues'][74]['qas']) # 29
        dtcls.raw_data['data']['dialogues'][57]['relations'][1]['type'] # 'Narration'
        len(dtcls.raw_data['data']['dialogues'][9]['relations']) # 10
        dtcls.raw_data['data']['dialogues'][26]['edus'][3]['speaker'] # 'codebrainz'
        len(dtcls.raw_data['data']['dialogues'][86]['edus']) # 11'''
    
    @classmethod
    def setUpClass(cls):
        print("setting up the class")

        
    def testMolweni_into_pandastest(self):
        print("start testing process: Molweni_into_pandas")
        self.mlwn_pnds = Molweni_into_pandas('test-Copy1.json').pandas_data
        self.assertEqual(len(self.mlwn_pnds), 100)
        self.assertEqual(self.mlwn_pnds[5][4][19]['id'], 'e49d165b91894835fd254dfc28d087a9')
        self.assertEqual(len(self.mlwn_pnds[74][4]), 29)
        self.assertEqual(self.mlwn_pnds[57][3][1]['type'], 'Narration')
        self.assertEqual(self.mlwn_pnds[26][2][3]['speaker'], 'codebrainz')   
        self.assertEqual(len(self.mlwn_pnds[86][2]), 11)                
        print("finish testing process: Molweni_into_pandas")
        
    def testMolweni_into_beautiful_pandastest(self):
        print("start testing process: Molweni_into_beautiful_pandas")
        self.mlwn_btfl_pnds = Molweni_into_beautiful_pandas('test-Copy1.json').pandas_data
        self.assertEqual(len(self.mlwn_btfl_pnds), 100)
        self.assertIs(type(self.mlwn_btfl_pnds[95]), list)
        self.assertIs(type(self.mlwn_btfl_pnds[34][2]), str)
        self.assertIs(type(self.mlwn_btfl_pnds[34][3]), str)
        print("finish testing process: Molweni_into_beautiful_pandas")
        
    def testMolweni_into_pandas_QA_editiontest(self):
        print("start testing process: Molweni_into_pandas_QA_edition")
        self.mlwn_pnds_qa = Molweni_into_pandas_QA_edition('test-Copy1.json').pandas_data
        self.assertEqual(len(self.mlwn_pnds_qa), 100)
        self.assertIs(type(self.mlwn_pnds_qa[68]), list)
        self.assertIs(type(self.mlwn_pnds_qa[34][4]), str)
        print("finish testing process: Molweni_into_pandas_QA_edition")
        
    def testMolweni_liststest(self):
        print("start testing process: Molweni_lists")
        self.mlwn_lst_cntxt = Molweni_lists('test-Copy1.json').list_of_contexts
        self.assertEqual(len(self.mlwn_lst_cntxt), 100)
        self.assertIs(type(self.mlwn_lst_cntxt[6]), str)
        self.mlwn_lst_edus = Molweni_lists('test-Copy1.json').list_of_EDUs
        self.assertEqual(len(self.mlwn_lst_edus), 100)
        self.assertIs(type(self.mlwn_lst_edus[44]), str)
        self.mlwn_lst_rltns = Molweni_lists('test-Copy1.json').list_of_relations
        self.assertEqual(len(self.mlwn_lst_rltns), 100)
        self.assertIs(type(self.mlwn_lst_rltns[99]), str)
        self.mlwn_lst_qas = Molweni_lists('test-Copy1.json').list_of_QAs
        self.assertEqual(len(self.mlwn_lst_qas), 100)
        self.assertIs(type(self.mlwn_lst_qas[55]), str)
        print("finish testing process: Molweni_lists")
    
    def tearDown(self):
        print("tearing down")
        
    def doCleanups(self):
        print("cleaning up")

In [656]:
unittest.main(argv=[''], verbosity=2, exit=False)

  after removing the cwd from sys.path.
ok
testMolweni_into_pandas_QA_editiontest (__main__.MolweniTest) ... 

setting up the class
setting up an object
start testing process: Molweni_into_beautiful_pandas
Successfully unwraped! You can call in via .pandas_data
finish testing process: Molweni_into_beautiful_pandas
tearing down
cleaning up
setting up an object
start testing process: Molweni_into_pandas_QA_edition
Successfully unwraped! You can call in via .pandas_data

ok
testMolweni_into_pandastest (__main__.MolweniTest) ... ok
testMolweni_liststest (__main__.MolweniTest) ... 


finish testing process: Molweni_into_pandas_QA_edition
tearing down
cleaning up
setting up an object
start testing process: Molweni_into_pandas
Successfully unwraped! You can call in via .pandas_data
finish testing process: Molweni_into_pandas
tearing down
cleaning up
setting up an object
start testing process: Molweni_lists
Successfully unwraped! You can call in via .pandas_data
You can call specific list via .list_of_contexts , .list_of_EDUs , .list_of_relations or .list_of_QAs!
Successfully unwraped! You can call in via .pandas_data
You can call specific list via .list_of_contexts , .list_of_EDUs , .list_of_relations or .list_of_QAs!
Successfully unwraped! You can call in via .pandas_data
You can call specific list via .list_of_contexts , .list_of_EDUs , .list_of_relations or .list_of_QAs!
Successfully unwraped! You can call in via .pandas_data
You can call specific list via .list_of_contexts , .list_of_EDUs , .list_of_relations or .list_of_QAs!
finish testing process: Molweni_list

ok

----------------------------------------------------------------------
Ran 4 tests in 0.589s

OK


<unittest.main.TestProgram at 0x1cba5f61e10>