In [1]:
import notebook_hook
import logging, sys
logging.disable(sys.maxsize)
import pathlib
import json
from tqdm import tqdm
import pandas as pd
from collections import Counter
import numpy as np
from qg.notebooks.t_tests.t_test_functions import (
    count_tokens,
    diff_number_words_per_question,
    diff_number_of_concepts_per_question,
    diff_prop_concepts_per_words,
    most_frequent_concepts,
    diff_number_of_verbs,
    diff_prop_of_verbs,
)

from qg.results_analysis.objects.POSAnalysis import POS_analysis_object

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ROOT_DIR = pathlib.Path().resolve().parents[1]
print(f"Root directory: {ROOT_DIR}")

Root directory: C:\Users\DeboraOrsolich\Development\question_generation_models\deep_qg\qg


In [3]:
model = "OQPL"
with open(ROOT_DIR/f"transformers_models/experiment_{model}/mapped_validation_questions.json", encoding="utf-8") as f:
    questions_val = json.load(f)
    pred_questions = questions_val["predictions"]

with open(ROOT_DIR/f"transformers_models/experiment_{model}/classification_validation_questions.json", encoding="utf-8") as f:
    pred_y = json.load(f)
    pred_y = pred_y["pred_y"]

In [4]:
df = pd.DataFrame.from_dict({"text": pred_questions, "labels": pred_y})

# preparing the data for the analysis...
useful = df[df["labels"]==1]
not_useful = df[df["labels"]==0]

sets = [useful, not_useful]
sets_name = ["useful", "not_useful"]
print(len(sets[0]), len(sets[1]))


992 185


In [5]:
print("### Are there statistically significant differences in the number of words in the questions of each group?")
diff_number_words_per_question(sets, sets_name)
print()
print("### Are there statistically significant differences in the number of concepts in the questions of each group?")
strings_groups = diff_number_of_concepts_per_question(sets, sets_name)
print()
print("### Are there statistically significant differences in the proportion of concepts per words in each question between both groups?")
diff_prop_concepts_per_words(sets, sets_name)


### Are there statistically significant differences in the number of words in the questions of each group?
No, there are not statistically significant differences in the length of the questions between the two groups
   P-value = 0.4398158235862317
   Average length of useful questions 11.331653225806452 tokens
   Average length of not_useful questions 11.6 tokens

### Are there statistically significant differences in the number of concepts in the questions of each group?


100%|██████████| 992/992 [00:00<00:00, 50637.10it/s]
100%|██████████| 185/185 [00:00<00:00, 60920.64it/s]


No, there are not statistically significant differences in the number of concepts in each question between the two groups
   P-value = 0.2884185089784427
   Average number of concepts in useful questions: 3.0534274193548385
   Average number of concepts in not useful questions: 3.1837837837837837

### Are there statistically significant differences in the proportion of concepts per words in each question between both groups?


100%|██████████| 992/992 [00:00<00:00, 76328.62it/s]
100%|██████████| 185/185 [00:00<00:00, 90913.44it/s]

No, there are not statistically significant differences HAVE NOT been found in the proportion of concepts per word in each question between the two groups
   P-value = 0.28734275904707623
   Average proportion of concepts per words in useful questions: 0.23380767944302489
   Average proportion of concepts per words in not_useful questions: 0.2392658224743459





In [6]:
print("### What are the most frequent concepts in each group?")
cnt_useful, cnt_not_useful, all_strings_useful, all_strings_not_useful = most_frequent_concepts(strings_groups)
n = 10

print(f"{n} MORE FREQUENT CONCEPTS IN USEFUL QUESTIONS")
print(f"   Concept:   frequency")
for i in range(n):
    concept = cnt_useful[i][0]
    count = cnt_useful[i][1]
    print(f"  {i}.   {concept}:     {count}")

print()
print(f"{n} MORE FREQUENT CONCEPTS IN NOT USEFUL QUESTIONS")
print(f"   Concept:   frequency")
for i in range(n):
    concept = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    print(f"  {i}.   {concept}:     {count}")

### What are the most frequent concepts in each group?
10 MORE FREQUENT CONCEPTS IN USEFUL QUESTIONS
   Concept:   frequency
  0.   type:     33
  1.   definition:     17
  2.   Warsaw:     14
  3.   Rhine:     13
  4.   University:     12
  5.   imperialism:     11
  6.   number:     11
  7.   Parliament:     9
  8.   immune system:     8
  9.   construction:     8

10 MORE FREQUENT CONCEPTS IN NOT USEFUL QUESTIONS
   Concept:   frequency
  0.   Warsaw:     4
  1.   force:     4
  2.   government:     4
  3.   southern California:     4
  4.   Newton:     3
  5.   wealth:     3
  6.   civil disobedience:     3
  7.   control:     2
  8.   term:     2
  9.   system:     2


In [7]:
print("### Are there statistically significant differences in the number of verbs in each question between both groups?")
verbs_groups = diff_number_of_verbs(sets, sets_name)
print("### Are there statistically significant differences in the proportion of verbs per words in each question between both groups?")
diff_prop_of_verbs(sets, sets_name)


### Are there statistically significant differences in the number of verbs in each question between both groups?


100%|██████████| 992/992 [00:00<00:00, 100472.07it/s]
100%|██████████| 185/185 [00:00<00:00, 61700.56it/s]


No, there are not statistically significant differences in the number of VERBS in each question between the two groups
   P-value = 0.9401040983387079
   Average number of verbs in useful questions: 0.9465725806451613 verbs
   Average number of verbs in not useful questions: 0.9513513513513514 verbs
### Are there statistically significant differences in the proportion of verbs per words in each question between both groups?


100%|██████████| 992/992 [00:00<00:00, 110241.89it/s]
100%|██████████| 185/185 [00:00<00:00, 92572.92it/s]

No, there are not Statistically significant differences in the proportion of VERBS per words in each question between the two groups
   P-value = 0.6461978354538308
   Average proportion of verbs per words in useful questions: 0.07556092802479986 verbs
   Average proportion of verbs per words in not useful questions: 0.07337937074119834 verbs





In [8]:
print("### What are the most frequent verbs?")
cnt_useful, cnt_not_useful, all_verbs_useful, all_verbs_not_useful = most_frequent_concepts(verbs_groups)
n = 10

print(f"{n} MORE FREQUENT VERBS IN USEFUL QUESTIONS")
print(f"   Verb:   frequency      Proportion")
for i in range(n):
    verb = cnt_useful[i][0]
    count = cnt_useful[i][1]
    prop = cnt_useful[i][1] / len(all_verbs_useful)
    print(f"  {i}.   {verb}:     {count}      {prop}")

print()
print(f"{n} MORE FREQUENT VERBS IN NOT USEFUL QUESTIONS")
print(f"   Verb:   frequency      Proportion")
for i in range(n):
    verb = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    prop = cnt_not_useful[i][1] / len(all_verbs_not_useful)
    print(f"  {i}.   {verb}:     {count}      {prop}")

### What are the most frequent verbs?
10 MORE FREQUENT VERBS IN USEFUL QUESTIONS
   Verb:   frequency      Proportion
  0.   given:     9      0.009584664536741214
  1.   established:     9      0.009584664536741214
  2.   found:     9      0.009584664536741214
  3.   based:     8      0.008519701810436636
  4.   located:     8      0.008519701810436636
  5.   known:     7      0.007454739084132056
  6.   developed:     6      0.006389776357827476
  7.   introduced:     6      0.006389776357827476
  8.   required:     6      0.006389776357827476
  9.   use:     6      0.006389776357827476

10 MORE FREQUENT VERBS IN NOT USEFUL QUESTIONS
   Verb:   frequency      Proportion
  0.   developed:     5      0.028409090909090908
  1.   found:     3      0.017045454545454544
  2.   founded:     3      0.017045454545454544
  3.   sent:     2      0.011363636363636364
  4.   formulated:     2      0.011363636363636364
  5.   established:     2      0.011363636363636364
  6.   known:     2      0.

In [9]:
first_words = {}
for set, name in zip(sets, sets_name):
    pos_analysis = POS_analysis_object()
    questions_pipeline = pos_analysis.nlp_pipeline(set["text"].values)

    first_word = []
    for question in tqdm(questions_pipeline):
        first_word.append(str(question[0]))
    first_words[name] = first_word

100%|██████████| 992/992 [00:00<00:00, 991126.62it/s]
100%|██████████| 185/185 [00:00<?, ?it/s]


In [10]:
cnt = Counter()
cnt_useful = Counter(first_words["useful"])
cnt_not_useful = Counter(first_words["not_useful"])

cnt_useful = sorted(cnt_useful.items(), key=lambda item: item[1])
cnt_not_useful = sorted(cnt_not_useful.items(), key=lambda item: item[1])

cnt_useful.reverse()
cnt_not_useful.reverse()

In [11]:
n = 10

print(f"{n} MORE FREQUENT FIRST WORDS IN USEFUL QUESTIONS")
print(f"   Init word:   frequency      Proportion")
for i in range(n):
    verb = cnt_useful[i][0]
    count = cnt_useful[i][1]
    prop = cnt_useful[i][1] / len(first_words["useful"])
    print(f"  {i}.   {verb}:     {count}      {prop}")

print()
print(f"{n} MORE FREQUENT FIRST WORDS IN NOT USEFUL QUESTIONS")
print(f"   Init word:   frequency      Proportion")
for i in range(n):
    verb = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    prop = cnt_not_useful[i][1] / len(first_words["not_useful"])
    print(f"  {i}.   {verb}:     {count}      {prop}")

10 MORE FREQUENT FIRST WORDS IN USEFUL QUESTIONS
   Init word:   frequency      Proportion
  0.   What:     506      0.5100806451612904
  1.   Who:     141      0.14213709677419356
  2.   When:     91      0.09173387096774194
  3.   How:     67      0.06754032258064516
  4.   In:     28      0.028225806451612902
  5.   Where:     21      0.021169354838709676
  6.   The:     11      0.011088709677419355
  7.   By:     7      0.007056451612903226
  8.   Which:     7      0.007056451612903226
  9.   Between:     6      0.006048387096774193

10 MORE FREQUENT FIRST WORDS IN NOT USEFUL QUESTIONS
   Init word:   frequency      Proportion
  0.   What:     98      0.5297297297297298
  1.   Who:     30      0.16216216216216217
  2.   When:     16      0.08648648648648649
  3.   How:     11      0.05945945945945946
  4.   In:     5      0.02702702702702703
  5.   Which:     3      0.016216216216216217
  6.   Where:     2      0.010810810810810811
  7.   The:     2      0.010810810810810811
  8.  