In [1]:
import notebook_hook
import logging, sys
logging.disable(sys.maxsize)
import pathlib
import json
from tqdm import tqdm
import pandas as pd
from collections import Counter
import numpy as np
from qg.notebooks.t_tests.t_test_functions import (
    count_tokens,
    diff_number_words_per_question,
    diff_number_of_concepts_per_question,
    diff_prop_concepts_per_words,
    most_frequent_concepts,
    diff_number_of_verbs,
    diff_prop_of_verbs,
)

from qg.results_analysis.objects.POSAnalysis import POS_analysis_object

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ROOT_DIR = pathlib.Path().resolve().parents[1]
print(f"Root directory: {ROOT_DIR}")

Root directory: C:\Users\DeboraOrsolich\Development\question_generation_models\deep_qg\qg


In [3]:
model = "AA"
with open(ROOT_DIR/f"transformers_models/experiment_{model}/mapped_validation_questions.json", encoding="utf-8") as f:
    questions_val = json.load(f)
    pred_questions = questions_val["predictions"]

with open(ROOT_DIR/f"transformers_models/experiment_{model}/classification_validation_questions.json", encoding="utf-8") as f:
    pred_y = json.load(f)
    pred_y = pred_y["pred_y"]

In [4]:
df = pd.DataFrame.from_dict({"text": pred_questions, "labels": pred_y})

# preparing the data for the analysis...
useful = df[df["labels"]==1]
not_useful = df[df["labels"]==0]

sets = [useful, not_useful]
sets_name = ["useful", "not_useful"]
print(len(sets[0]), len(sets[1]))


1076 101


In [5]:
print("### Are there statistically significant differences in the number of words in the questions of each group?")
diff_number_words_per_question(sets, sets_name)
print()
print("### Are there statistically significant differences in the number of concepts in the questions of each group?")
strings_groups = diff_number_of_concepts_per_question(sets, sets_name)
print()
print("### Are there statistically significant differences in the proportion of concepts per words in each question between both groups?")
diff_prop_concepts_per_words(sets, sets_name)


### Are there statistically significant differences in the number of words in the questions of each group?
No, there are not statistically significant differences in the length of the questions between the two groups
   P-value = 0.7687897846588561
   Average length of useful questions 10.04460966542751 tokens
   Average length of not_useful questions 9.94059405940594 tokens

### Are there statistically significant differences in the number of concepts in the questions of each group?


100%|██████████| 1076/1076 [00:00<00:00, 89749.85it/s]
100%|██████████| 101/101 [00:00<00:00, 51255.26it/s]


No, there are not statistically significant differences in the number of concepts in each question between the two groups
   P-value = 0.6688802111489434
   Average number of concepts in useful questions: 2.5446096654275094
   Average number of concepts in not useful questions: 2.495049504950495

### Are there statistically significant differences in the proportion of concepts per words in each question between both groups?


100%|██████████| 1076/1076 [00:00<00:00, 82533.03it/s]
100%|██████████| 101/101 [00:00<00:00, 100839.02it/s]

No, there are not statistically significant differences HAVE NOT been found in the proportion of concepts per word in each question between the two groups
   P-value = 0.6498385143204044
   Average proportion of concepts per words in useful questions: 0.2223098705100306
   Average proportion of concepts per words in not_useful questions: 0.21932295288976164





In [6]:
print("### What are the most frequent concepts in each group?")
cnt_useful, cnt_not_useful, all_strings_useful, all_strings_not_useful = most_frequent_concepts(strings_groups)
n = 10

print(f"{n} MORE FREQUENT CONCEPTS IN USEFUL QUESTIONS")
print(f"   Concept:   frequency")
for i in range(n):
    concept = cnt_useful[i][0]
    count = cnt_useful[i][1]
    print(f"  {i}.   {concept}:     {count}")

print()
print(f"{n} MORE FREQUENT CONCEPTS IN NOT USEFUL QUESTIONS")
print(f"   Concept:   frequency")
for i in range(n):
    concept = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    print(f"  {i}.   {concept}:     {count}")

### What are the most frequent concepts in each group?
10 MORE FREQUENT CONCEPTS IN USEFUL QUESTIONS
   Concept:   frequency
  0.   year:     76
  1.   type:     13
  2.   Justice:     12
  3.   Court:     12
  4.   people:     12
  5.   University:     11
  6.   Warsaw:     10
  7.   percentage:     10
  8.   ctenophores:     8
  9.   Victoria:     8

10 MORE FREQUENT CONCEPTS IN NOT USEFUL QUESTIONS
   Concept:   frequency
  0.   year:     4
  1.   water:     3
  2.   Warsaw:     2
  3.   blood levels:     2
  4.   president:     2
  5.   case:     2
  6.   people:     2
  7.   non:     2
  8.   Virgin Media:     2
  9.   Sky:     2


In [7]:
print("### Are there statistically significant differences in the number of verbs in each question between both groups?")
verbs_groups = diff_number_of_verbs(sets, sets_name)
print("### Are there statistically significant differences in the proportion of verbs per words in each question between both groups?")
diff_prop_of_verbs(sets, sets_name)


### Are there statistically significant differences in the number of verbs in each question between both groups?


100%|██████████| 1076/1076 [00:00<00:00, 153735.90it/s]
100%|██████████| 101/101 [00:00<00:00, 101152.03it/s]


No, there are not statistically significant differences in the number of VERBS in each question between the two groups
   P-value = 0.32526721063204656
   Average number of verbs in useful questions: 0.8894052044609665 verbs
   Average number of verbs in not useful questions: 0.8118811881188119 verbs
### Are there statistically significant differences in the proportion of verbs per words in each question between both groups?


100%|██████████| 1076/1076 [00:00<00:00, 153730.66it/s]
100%|██████████| 101/101 [00:00<00:00, 33677.14it/s]

No, there are not Statistically significant differences in the proportion of VERBS per words in each question between the two groups
   P-value = 0.2535669100696515
   Average proportion of verbs per words in useful questions: 0.07920951575332846 verbs
   Average proportion of verbs per words in not useful questions: 0.07173700380448687 verbs





In [8]:
print("### What are the most frequent verbs?")
cnt_useful, cnt_not_useful, all_verbs_useful, all_verbs_not_useful = most_frequent_concepts(verbs_groups)
n = 10

print(f"{n} MORE FREQUENT VERBS IN USEFUL QUESTIONS")
print(f"   Verb:   frequency      Proportion")
for i in range(n):
    verb = cnt_useful[i][0]
    count = cnt_useful[i][1]
    prop = cnt_useful[i][1] / len(all_verbs_useful)
    print(f"  {i}.   {verb}:     {count}      {prop}")

print()
print(f"{n} MORE FREQUENT VERBS IN NOT USEFUL QUESTIONS")
print(f"   Verb:   frequency      Proportion")
for i in range(n):
    verb = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    prop = cnt_not_useful[i][1] / len(all_verbs_not_useful)
    print(f"  {i}.   {verb}:     {count}      {prop}")

### What are the most frequent verbs?
10 MORE FREQUENT VERBS IN USEFUL QUESTIONS
   Verb:   frequency      Proportion
  0.   established:     14      0.014629049111807733
  1.   known:     9      0.009404388714733543
  2.   published:     8      0.008359456635318705
  3.   built:     8      0.008359456635318705
  4.   begin:     8      0.008359456635318705
  5.   use:     7      0.0073145245559038665
  6.   believe:     7      0.0073145245559038665
  7.   developed:     7      0.0073145245559038665
  8.   created:     7      0.0073145245559038665
  9.   founded:     6      0.006269592476489028

10 MORE FREQUENT VERBS IN NOT USEFUL QUESTIONS
   Verb:   frequency      Proportion
  0.   founded:     3      0.036585365853658534
  1.   demolished:     2      0.024390243902439025
  2.   caused:     2      0.024390243902439025
  3.   identified:     1      0.012195121951219513
  4.   dealing:     1      0.012195121951219513
  5.   expounded:     1      0.012195121951219513
  6.   pointing:   

In [9]:
first_words = {}
for set, name in zip(sets, sets_name):
    pos_analysis = POS_analysis_object()
    questions_pipeline = pos_analysis.nlp_pipeline(set["text"].values)

    first_word = []
    for question in tqdm(questions_pipeline):
        first_word.append(str(question[0]))
    first_words[name] = first_word

100%|██████████| 1076/1076 [00:00<00:00, 538167.32it/s]
100%|██████████| 101/101 [00:00<?, ?it/s]


In [10]:
cnt = Counter()
cnt_useful = Counter(first_words["useful"])
cnt_not_useful = Counter(first_words["not_useful"])

cnt_useful = sorted(cnt_useful.items(), key=lambda item: item[1])
cnt_not_useful = sorted(cnt_not_useful.items(), key=lambda item: item[1])

cnt_useful.reverse()
cnt_not_useful.reverse()

In [11]:
n = 10

print(f"{n} MORE FREQUENT FIRST WORDS IN USEFUL QUESTIONS")
print(f"   Init word:   frequency      Proportion")
for i in range(n):
    verb = cnt_useful[i][0]
    count = cnt_useful[i][1]
    prop = cnt_useful[i][1] / len(first_words["useful"])
    print(f"  {i}.   {verb}:     {count}      {prop}")

print()
print(f"{n} MORE FREQUENT FIRST WORDS IN NOT USEFUL QUESTIONS")
print(f"   Init word:   frequency      Proportion")
for i in range(n):
    verb = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    prop = cnt_not_useful[i][1] / len(first_words["not_useful"])
    print(f"  {i}.   {verb}:     {count}      {prop}")

10 MORE FREQUENT FIRST WORDS IN USEFUL QUESTIONS
   Init word:   frequency      Proportion
  0.   What:     552      0.5130111524163569
  1.   Who:     138      0.12825278810408922
  2.   When:     130      0.120817843866171
  3.   How:     127      0.11802973977695168
  4.   In:     79      0.07342007434944238
  5.   Where:     13      0.012081784386617101
  6.   The:     6      0.0055762081784386614
  7.   On:     4      0.0037174721189591076
  8.   Private:     3      0.0027881040892193307
  9.   Which:     3      0.0027881040892193307

10 MORE FREQUENT FIRST WORDS IN NOT USEFUL QUESTIONS
   Init word:   frequency      Proportion
  0.   What:     47      0.46534653465346537
  1.   Who:     18      0.1782178217821782
  2.   How:     16      0.15841584158415842
  3.   When:     14      0.13861386138613863
  4.   In:     3      0.0297029702970297
  5.   During:     1      0.009900990099009901
  6.   the:     1      0.009900990099009901
  7.   Virgin:     1      0.009900990099009901


IndexError: list index out of range