In [1]:
import notebook_hook
import logging, sys
logging.disable(sys.maxsize)
import pathlib
import json
from tqdm import tqdm
import pandas as pd
from collections import Counter
import numpy as np
from qg.notebooks.t_tests.t_test_functions import (
    count_tokens,
    diff_number_words_per_question,
    diff_number_of_concepts_per_question,
    diff_prop_concepts_per_words,
    most_frequent_concepts,
    diff_number_of_verbs,
    diff_prop_of_verbs,
)

from qg.results_analysis.objects.POSAnalysis import POS_analysis_object

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ROOT_DIR = pathlib.Path().resolve().parents[1]
print(f"Root directory: {ROOT_DIR}")

Root directory: C:\Users\DeboraOrsolich\Development\question_generation_models\deep_qg\qg


In [3]:
model = "basic"
with open(ROOT_DIR/f"transformers_models/experiment_{model}/mapped_validation_questions.json", encoding="utf-8") as f:
    questions_val = json.load(f)
    pred_questions = questions_val["predictions"]

with open(ROOT_DIR/f"transformers_models/experiment_{model}/classification_validation_questions.json", encoding="utf-8") as f:
    pred_y = json.load(f)
    pred_y = pred_y["pred_y"]

In [4]:
df = pd.DataFrame.from_dict({"text": pred_questions, "labels": pred_y})

# preparing the data for the analysis...
useful = df[df["labels"]==1]
not_useful = df[df["labels"]==0]

sets = [useful, not_useful]
sets_name = ["useful", "not_useful"]
print(len(sets[0]), len(sets[1]))


1094 83


In [5]:
print("### Are there statistically significant differences in the number of words in the questions of each group?")
diff_number_words_per_question(sets, sets_name)
print()
print("### Are there statistically significant differences in the number of concepts in the questions of each group?")
strings_groups = diff_number_of_concepts_per_question(sets, sets_name)
print()
print("### Are there statistically significant differences in the proportion of concepts per words in each question between both groups?")
diff_prop_concepts_per_words(sets, sets_name)


### Are there statistically significant differences in the number of words in the questions of each group?
No, there are not statistically significant differences in the length of the questions between the two groups
   P-value = 0.23697764000193974
   Average length of useful questions 9.896709323583181 tokens
   Average length of not_useful questions 10.397590361445783 tokens

### Are there statistically significant differences in the number of concepts in the questions of each group?


100%|██████████| 1094/1094 [00:00<00:00, 78145.86it/s]
100%|██████████| 83/83 [00:00<00:00, 41517.86it/s]


No, there are not statistically significant differences in the number of concepts in each question between the two groups
   P-value = 0.652477644247162
   Average number of concepts in useful questions: 2.541133455210238
   Average number of concepts in not useful questions: 2.602409638554217

### Are there statistically significant differences in the proportion of concepts per words in each question between both groups?


100%|██████████| 1094/1094 [00:00<00:00, 85000.25it/s]
100%|██████████| 83/83 [00:00<00:00, 83105.09it/s]

No, there are not statistically significant differences HAVE NOT been found in the proportion of concepts per word in each question between the two groups
   P-value = 0.518471208253287
   Average proportion of concepts per words in useful questions: 0.2236398953353267
   Average proportion of concepts per words in not_useful questions: 0.21909469511886853





In [6]:
print("### What are the most frequent concepts in each group?")
cnt_useful, cnt_not_useful, all_strings_useful, all_strings_not_useful = most_frequent_concepts(strings_groups)
n = 10

print(f"{n} MORE FREQUENT CONCEPTS IN USEFUL QUESTIONS")
print(f"   Concept:   frequency")
for i in range(n):
    concept = cnt_useful[i][0]
    count = cnt_useful[i][1]
    print(f"  {i}.   {concept}:     {count}")

print()
print(f"{n} MORE FREQUENT CONCEPTS IN NOT USEFUL QUESTIONS")
print(f"   Concept:   frequency")
for i in range(n):
    concept = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    print(f"  {i}.   {concept}:     {count}")

### What are the most frequent concepts in each group?
10 MORE FREQUENT CONCEPTS IN USEFUL QUESTIONS
   Concept:   frequency
  0.   year:     32
  1.   Warsaw:     16
  2.   Huguenots:     13
  3.   people:     13
  4.   University:     11
  5.   Fresno:     10
  6.   Rhine:     9
  7.   Harvard:     9
  8.   Jacksonville:     8
  9.   Chicago:     8

10 MORE FREQUENT CONCEPTS IN NOT USEFUL QUESTIONS
   Concept:   frequency
  0.   Virgin Media:     3
  1.   Europe:     2
  2.   Chicago:     2
  3.   University:     2
  4.   civil disobedience:     2
  5.   percentage:     2
  6.   people:     2
  7.   Justice:     2
  8.   Court:     2
  9.   equal:     2


In [7]:
print("### Are there statistically significant differences in the number of verbs in each question between both groups?")
verbs_groups = diff_number_of_verbs(sets, sets_name)
print("### Are there statistically significant differences in the proportion of verbs per words in each question between both groups?")
diff_prop_of_verbs(sets, sets_name)


### Are there statistically significant differences in the number of verbs in each question between both groups?


100%|██████████| 1094/1094 [00:00<00:00, 188048.38it/s]
100%|██████████| 83/83 [00:00<00:00, 83006.02it/s]


No, there are not statistically significant differences in the number of VERBS in each question between the two groups
   P-value = 0.5957902391911704
   Average number of verbs in useful questions: 0.916819012797075 verbs
   Average number of verbs in not useful questions: 0.963855421686747 verbs
### Are there statistically significant differences in the proportion of verbs per words in each question between both groups?


100%|██████████| 1094/1094 [00:00<00:00, 109428.80it/s]
100%|██████████| 83/83 [00:00<00:00, 83006.02it/s]

No, there are not Statistically significant differences in the proportion of VERBS per words in each question between the two groups
   P-value = 0.9959409511514992
   Average proportion of verbs per words in useful questions: 0.08221030882725852 verbs
   Average proportion of verbs per words in not useful questions: 0.0821729517243029 verbs





In [8]:
print("### What are the most frequent verbs?")
cnt_useful, cnt_not_useful, all_verbs_useful, all_verbs_not_useful = most_frequent_concepts(verbs_groups)
n = 10

print(f"{n} MORE FREQUENT VERBS IN USEFUL QUESTIONS")
print(f"   Verb:   frequency      Proportion")
for i in range(n):
    verb = cnt_useful[i][0]
    count = cnt_useful[i][1]
    prop = cnt_useful[i][1] / len(all_verbs_useful)
    print(f"  {i}.   {verb}:     {count}      {prop}")

print()
print(f"{n} MORE FREQUENT VERBS IN NOT USEFUL QUESTIONS")
print(f"   Verb:   frequency      Proportion")
for i in range(n):
    verb = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    prop = cnt_not_useful[i][1] / len(all_verbs_not_useful)
    print(f"  {i}.   {verb}:     {count}      {prop}")

### What are the most frequent verbs?
10 MORE FREQUENT VERBS IN USEFUL QUESTIONS
   Verb:   frequency      Proportion
  0.   established:     18      0.01794616151545364
  1.   located:     12      0.011964107676969093
  2.   begin:     11      0.010967098703888335
  3.   called:     11      0.010967098703888335
  4.   built:     10      0.009970089730807577
  5.   use:     9      0.00897308075772682
  6.   known:     9      0.00897308075772682
  7.   developed:     9      0.00897308075772682
  8.   founded:     8      0.007976071784646061
  9.   occur:     8      0.007976071784646061

10 MORE FREQUENT VERBS IN NOT USEFUL QUESTIONS
   Verb:   frequency      Proportion
  0.   developed:     3      0.0375
  1.   designed:     2      0.025
  2.   defined:     2      0.025
  3.   established:     2      0.025
  4.   describe:     1      0.0125
  5.   means:     1      0.0125
  6.   apply:     1      0.0125
  7.   dominated:     1      0.0125
  8.   capture:     1      0.0125
  9.   born:  

In [9]:
first_words = {}
for set, name in zip(sets, sets_name):
    pos_analysis = POS_analysis_object()
    questions_pipeline = pos_analysis.nlp_pipeline(set["text"].values)

    first_word = []
    for question in tqdm(questions_pipeline):
        first_word.append(str(question[0]))
    first_words[name] = first_word

100%|██████████| 1094/1094 [00:00<00:00, 1094079.30it/s]
100%|██████████| 83/83 [00:00<?, ?it/s]


In [10]:
cnt = Counter()
cnt_useful = Counter(first_words["useful"])
cnt_not_useful = Counter(first_words["not_useful"])

cnt_useful = sorted(cnt_useful.items(), key=lambda item: item[1])
cnt_not_useful = sorted(cnt_not_useful.items(), key=lambda item: item[1])

cnt_useful.reverse()
cnt_not_useful.reverse()

In [11]:
n = 10

print(f"{n} MORE FREQUENT FIRST WORDS IN USEFUL QUESTIONS")
print(f"   Init word:   frequency      Proportion")
for i in range(n):
    verb = cnt_useful[i][0]
    count = cnt_useful[i][1]
    prop = cnt_useful[i][1] / len(first_words["useful"])
    print(f"  {i}.   {verb}:     {count}      {prop}")

print()
print(f"{n} MORE FREQUENT FIRST WORDS IN NOT USEFUL QUESTIONS")
print(f"   Init word:   frequency      Proportion")
for i in range(n):
    verb = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    prop = cnt_not_useful[i][1] / len(first_words["not_useful"])
    print(f"  {i}.   {verb}:     {count}      {prop}")

10 MORE FREQUENT FIRST WORDS IN USEFUL QUESTIONS
   Init word:   frequency      Proportion
  0.   What:     462      0.42230347349177333
  1.   When:     202      0.1846435100548446
  2.   Who:     175      0.15996343692870202
  3.   How:     130      0.11882998171846434
  4.   In:     36      0.03290676416819013
  5.   Where:     26      0.02376599634369287
  6.   The:     18      0.016453382084095063
  7.   According:     3      0.002742230347349177
  8.   Private:     3      0.002742230347349177
  9.   Which:     3      0.002742230347349177

10 MORE FREQUENT FIRST WORDS IN NOT USEFUL QUESTIONS
   Init word:   frequency      Proportion
  0.   What:     42      0.5060240963855421
  1.   When:     13      0.1566265060240964
  2.   Who:     13      0.1566265060240964
  3.   How:     7      0.08433734939759036
  4.   In:     2      0.024096385542168676
  5.   The:     1      0.012048192771084338
  6.   Where:     1      0.012048192771084338
  7.   By:     1      0.012048192771084338
  8.