In [1]:
import notebook_hook
import logging, sys
logging.disable(sys.maxsize)
import pathlib
import json
from tqdm import tqdm
import pandas as pd
from collections import Counter
import numpy as np
from qg.notebooks.t_tests.t_test_functions import (
    count_tokens,
    diff_number_words_per_question,
    diff_number_of_concepts_per_question,
    diff_prop_concepts_per_words,
    most_frequent_concepts,
    diff_number_of_verbs,
    diff_prop_of_verbs,
)

from qg.results_analysis.objects.POSAnalysis import POS_analysis_object

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ROOT_DIR = pathlib.Path().resolve().parents[1]
print(f"Root directory: {ROOT_DIR}")

Root directory: C:\Users\DeboraOrsolich\Development\question_generation_models\deep_qg\qg


In [3]:
model = "AQPL"
with open(ROOT_DIR/f"transformers_models/experiment_{model}/mapped_validation_questions.json", encoding="utf-8") as f:
    questions_val = json.load(f)
    pred_questions = questions_val["predictions"]

with open(ROOT_DIR/f"transformers_models/experiment_{model}/classification_validation_questions.json", encoding="utf-8") as f:
    pred_y = json.load(f)
    pred_y = pred_y["pred_y"]

In [4]:
df = pd.DataFrame.from_dict({"text": pred_questions, "labels": pred_y})

# preparing the data for the analysis...
useful = df[df["labels"]==1]
not_useful = df[df["labels"]==0]

sets = [useful, not_useful]
sets_name = ["useful", "not_useful"]
print(len(sets[0]), len(sets[1]))


1232 118


In [5]:
print("### Are there statistically significant differences in the number of words in the questions of each group?")
diff_number_words_per_question(sets, sets_name)
print()
print("### Are there statistically significant differences in the number of concepts in the questions of each group?")
strings_groups = diff_number_of_concepts_per_question(sets, sets_name)
print()
print("### Are there statistically significant differences in the proportion of concepts per words in each question between both groups?")
diff_prop_concepts_per_words(sets, sets_name)


### Are there statistically significant differences in the number of words in the questions of each group?
No, there are not statistically significant differences in the length of the questions between the two groups
   P-value = 0.12467276960669016
   Average length of useful questions 11.67775974025974 tokens
   Average length of not_useful questions 10.923728813559322 tokens

### Are there statistically significant differences in the number of concepts in the questions of each group?


100%|██████████| 1232/1232 [00:00<00:00, 60660.71it/s]
100%|██████████| 118/118 [00:00<00:00, 60445.51it/s]


No, there are not statistically significant differences in the number of concepts in each question between the two groups
   P-value = 0.13222071413002534
   Average number of concepts in useful questions: 3.1314935064935066
   Average number of concepts in not useful questions: 2.889830508474576

### Are there statistically significant differences in the proportion of concepts per words in each question between both groups?


100%|██████████| 1232/1232 [00:00<00:00, 72468.73it/s]
100%|██████████| 118/118 [00:00<00:00, 57960.87it/s]

No, there are not statistically significant differences HAVE NOT been found in the proportion of concepts per word in each question between the two groups
   P-value = 0.2430872764542166
   Average proportion of concepts per words in useful questions: 0.22992771017783972
   Average proportion of concepts per words in not_useful questions: 0.22281046495034962





In [6]:
print("### What are the most frequent concepts in each group?")
cnt_useful, cnt_not_useful, all_strings_useful, all_strings_not_useful = most_frequent_concepts(strings_groups)
n = 10

print(f"{n} MORE FREQUENT CONCEPTS IN USEFUL QUESTIONS")
print(f"   Concept:   frequency")
for i in range(n):
    concept = cnt_useful[i][0]
    count = cnt_useful[i][1]
    print(f"  {i}.   {concept}:     {count}")

print()
print(f"{n} MORE FREQUENT CONCEPTS IN NOT USEFUL QUESTIONS")
print(f"   Concept:   frequency")
for i in range(n):
    concept = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    print(f"  {i}.   {concept}:     {count}")

### What are the most frequent concepts in each group?
10 MORE FREQUENT CONCEPTS IN USEFUL QUESTIONS
   Concept:   frequency
  0.   year:     43
  1.   hl:     22
  2.   Warsaw:     21
  3.   type:     20
  4.   h:     18
  5.   Fresno:     16
  6.   Huguenots:     15
  7.   Victoria:     15
  8.   people:     15
  9.   University:     14

10 MORE FREQUENT CONCEPTS IN NOT USEFUL QUESTIONS
   Concept:   frequency
  0.   year:     6
  1.   hl:     5
  2.   Warsaw:     3
  3.   Rhine:     3
  4.   type:     3
  5.   income:     2
  6.   Harvard:     2
  7.   land:     2
  8.   tuition:     2
  9.   term:     2


In [7]:
print("### Are there statistically significant differences in the number of verbs in each question between both groups?")
verbs_groups = diff_number_of_verbs(sets, sets_name)
print("### Are there statistically significant differences in the proportion of verbs per words in each question between both groups?")
diff_prop_of_verbs(sets, sets_name)


### Are there statistically significant differences in the number of verbs in each question between both groups?


100%|██████████| 1232/1232 [00:00<00:00, 136367.73it/s]
100%|██████████| 118/118 [00:00<00:00, 114381.30it/s]


No, there are not statistically significant differences in the number of VERBS in each question between the two groups
   P-value = 0.0578927080264915
   Average number of verbs in useful questions: 1.021103896103896 verbs
   Average number of verbs in not useful questions: 0.864406779661017 verbs
### Are there statistically significant differences in the proportion of verbs per words in each question between both groups?


100%|██████████| 1232/1232 [00:00<00:00, 141200.75it/s]
100%|██████████| 118/118 [00:00<00:00, 69395.38it/s]

No, there are not Statistically significant differences in the proportion of VERBS per words in each question between the two groups
   P-value = 0.16726632763575155
   Average proportion of verbs per words in useful questions: 0.07767426780443981 verbs
   Average proportion of verbs per words in not useful questions: 0.06969291836613785 verbs





In [8]:
print("### What are the most frequent verbs?")
cnt_useful, cnt_not_useful, all_verbs_useful, all_verbs_not_useful = most_frequent_concepts(verbs_groups)
n = 10

print(f"{n} MORE FREQUENT VERBS IN USEFUL QUESTIONS")
print(f"   Verb:   frequency      Proportion")
for i in range(n):
    verb = cnt_useful[i][0]
    count = cnt_useful[i][1]
    prop = cnt_useful[i][1] / len(all_verbs_useful)
    print(f"  {i}.   {verb}:     {count}      {prop}")

print()
print(f"{n} MORE FREQUENT VERBS IN NOT USEFUL QUESTIONS")
print(f"   Verb:   frequency      Proportion")
for i in range(n):
    verb = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    prop = cnt_not_useful[i][1] / len(all_verbs_not_useful)
    print(f"  {i}.   {verb}:     {count}      {prop}")

### What are the most frequent verbs?
10 MORE FREQUENT VERBS IN USEFUL QUESTIONS
   Verb:   frequency      Proportion
  0.   known:     21      0.016693163751987282
  1.   established:     18      0.014308426073131956
  2.   developed:     15      0.01192368839427663
  3.   introduced:     11      0.008744038155802861
  4.   called:     11      0.008744038155802861
  5.   given:     11      0.008744038155802861
  6.   begin:     11      0.008744038155802861
  7.   located:     10      0.00794912559618442
  8.   operate:     9      0.007154213036565978
  9.   based:     9      0.007154213036565978

10 MORE FREQUENT VERBS IN NOT USEFUL QUESTIONS
   Verb:   frequency      Proportion
  0.   die:     3      0.029411764705882353
  1.   lived:     3      0.029411764705882353
  2.   created:     2      0.0196078431372549
  3.   work:     2      0.0196078431372549
  4.   hit:     2      0.0196078431372549
  5.   established:     2      0.0196078431372549
  6.   founded:     2      0.01960784313

In [9]:
first_words = {}
for set, name in zip(sets, sets_name):
    pos_analysis = POS_analysis_object()
    questions_pipeline = pos_analysis.nlp_pipeline(set["text"].values)

    first_word = []
    for question in tqdm(questions_pipeline):
        first_word.append(str(question[0]))
    first_words[name] = first_word

100%|██████████| 1232/1232 [00:00<00:00, 617074.58it/s]
100%|██████████| 118/118 [00:00<00:00, 117868.03it/s]


In [10]:
cnt = Counter()
cnt_useful = Counter(first_words["useful"])
cnt_not_useful = Counter(first_words["not_useful"])

cnt_useful = sorted(cnt_useful.items(), key=lambda item: item[1])
cnt_not_useful = sorted(cnt_not_useful.items(), key=lambda item: item[1])

cnt_useful.reverse()
cnt_not_useful.reverse()

In [11]:
n = 10

print(f"{n} MORE FREQUENT FIRST WORDS IN USEFUL QUESTIONS")
print(f"   Init word:   frequency      Proportion")
for i in range(n):
    verb = cnt_useful[i][0]
    count = cnt_useful[i][1]
    prop = cnt_useful[i][1] / len(first_words["useful"])
    print(f"  {i}.   {verb}:     {count}      {prop}")

print()
print(f"{n} MORE FREQUENT FIRST WORDS IN NOT USEFUL QUESTIONS")
print(f"   Init word:   frequency      Proportion")
for i in range(n):
    verb = cnt_not_useful[i][0]
    count = cnt_not_useful[i][1]
    prop = cnt_not_useful[i][1] / len(first_words["not_useful"])
    print(f"  {i}.   {verb}:     {count}      {prop}")

10 MORE FREQUENT FIRST WORDS IN USEFUL QUESTIONS
   Init word:   frequency      Proportion
  0.   What:     577      0.46834415584415584
  1.    :     152      0.12337662337662338
  2.   Who:     141      0.11444805194805195
  3.   When:     129      0.10470779220779221
  4.   How:     119      0.09659090909090909
  5.   In:     40      0.032467532467532464
  6.   Where:     16      0.012987012987012988
  7.   Which:     8      0.006493506493506494
  8.   The:     8      0.006493506493506494
  9.   the:     3      0.002435064935064935

10 MORE FREQUENT FIRST WORDS IN NOT USEFUL QUESTIONS
   Init word:   frequency      Proportion
  0.   What:     49      0.4152542372881356
  1.    :     23      0.19491525423728814
  2.   Who:     14      0.11864406779661017
  3.   How:     11      0.09322033898305085
  4.   When:     9      0.07627118644067797
  5.   In:     7      0.059322033898305086
  6.   Where:     2      0.01694915254237288
  7.   From:     1      0.00847457627118644
  8.   Which: