<a href="https://colab.research.google.com/github/Yhola/Computational-Pragmatic-Cability-Analysis-of-Chat-GPT-4o/blob/main/Untitled21.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install python-docx
!pip install pandas

import docx
import re
import pandas as pd

def extract_paragraphs_from_docx(file_path):
    doc = docx.Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    combined_text = "\n".join(full_text)
    paragraphs = [p.strip() for p in combined_text.split('####') if p.strip()]
    return paragraphs

# Original checking functions remain the same, but we will call them in new order.

def check_factor_1_clear_argumentation(paragraph):
    # Originally factor_24: analysis beyond description
    patterns = ["this implies", "the underlying cause", "a possible interpretation is"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_2_diverse_perspectives(paragraph):
    # Originally factor_2: multiple viewpoints
    patterns = ["an interdisciplinary view suggests", "cross-cultural studies show"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_3_comparative_analysis(paragraph):
    # Originally factor_7: phrases like "in contrast," "similarly," ...
    patterns = ["in contrast", "similarly", "while this suggests", "a key difference lies in"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_4_logical_structure(paragraph):
    # Originally factor_8: sequential ideas (therefore, however...)
    patterns = ["therefore", "however", "consequently", "additionally"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_5_theoretical_application(paragraph):
    # Originally factor_10: application of models/theories
    patterns = ["theory", "framework", "model", "example"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_6_depth_of_insight(paragraph):
    # Originally factor_12: analysis beyond description
    patterns = ["this implies", "the underlying cause", "a possible interpretation is"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_7_counterarguments_limitations(paragraph):
    # Originally factor_14: acknowledging weaknesses
    patterns = ["a limitation of this approach is", "critics argue", "this overlooks"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_8_citation_of_sources(paragraph):
    # Originally factor_21: proper attribution
    patterns = ["as noted by", "according to recent studies", "[author,", " (20"]
    has_citation = bool(re.search(r"\(\w+,\s*\d{4}\)", paragraph))
    return 1 if any(p in paragraph.lower() for p in patterns) or has_citation else 0

def check_factor_9_critical_engagement_with_sources(paragraph):
    # Originally factor_11: highly critical
    patterns = ["critically", "challenge the assumption", "critique", "critical analysis"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_10_questions_for_further_research(paragraph):
    # Originally factor_20: future studies could explore
    patterns = ["future studies could explore", "this raises questions about"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_11_statistics_empirical_evidence(paragraph):
    # Originally factor_4: reliable data, peer-reviewed
    patterns = ["peer-reviewed", "according to", "as noted by", "evidence", "data from"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_12_original_thought_interpretation(paragraph):
    # Originally factor_18: innovative ideas
    patterns = ["this suggests a novel approach", "an unexamined possibility is"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_13_word_count(paragraph):
    # Originally factor_1: one 300 words paragraph
    words = paragraph.split()
    return 1 if len(words) == 300 else 0

def check_factor_14_tone(paragraph):
    # Originally factor_5: formal tone
    slang_words = ["gonna", "wanna", "kiddo", "ain't"]
    return 0 if any(s in paragraph.lower() for s in slang_words) else 1

def check_factor_15_style(paragraph):
    # Originally factor_6: academic style (heuristic: if factor_2 or factor_11 old is present)
    # We'll reuse old logic: if multiple viewpoints or evidence present => academic style.
    if check_factor_2_diverse_perspectives(paragraph) == 1 or check_factor_11_statistics_empirical_evidence(paragraph) == 1:
        return 1
    else:
        return 0

def check_factor_16_purpose(paragraph):
    # Originally factor_9: reason of writing ideal paragraph
    patterns = ["ideal written academic writing paragraph", "the purpose of this paragraph"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_17_nature(paragraph):
    # Originally factor_19: use "because" & avoid "align", etc.
    forbidden = ["align", "nuanced", "comprehensive", "underscore", "interplay", "ultimately"]
    if "because" in paragraph.lower():
        if not any(f in paragraph.lower() for f in forbidden):
            return 1
    return 0

def check_factor_18_level(paragraph):
    # Originally factor_13: write for Level 7 MSC
    patterns = ["masters-level", "postgraduate level", "msc level"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_19_hedging(paragraph):
    # Originally factor_15: hedging words
    hedging_words = ["may", "might", "could", "possibly", "arguably"]
    return 1 if any(w in paragraph.lower() for w in hedging_words) else 0

def check_factor_20_syntax(paragraph):
    # Originally factor_3: no clause with comma + gerund
    if re.search(r",\s*\w+ing\b", paragraph):
        return 0
    else:
        return 1

def check_factor_21_morphology(paragraph):
    # Originally factor_16: precise vocabulary
    patterns = ["significant", "noteworthy", "validity"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_22_punctuation(paragraph):
    # Originally factor_23: use the word 'because' between clauses
    # We interpret as punctuation/structure related.
    # Just check if "because" present:
    return 1 if "because" in paragraph.lower() else 0

def check_factor_23_role(paragraph):
    # Originally factor_22: perspective of an MSC student
    patterns = ["as a student of msc", "in my postgraduate studies", "from an msc perspective"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_24_type_of_work(paragraph):
    # Originally factor_17: write for essay
    patterns = ["essay", "this essay"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def score_paragraph(paragraph):
    # New order of calling the factors
    factors = [
        check_factor_1_clear_argumentation(paragraph),
        check_factor_2_diverse_perspectives(paragraph),
        check_factor_3_comparative_analysis(paragraph),
        check_factor_4_logical_structure(paragraph),
        check_factor_5_theoretical_application(paragraph),
        check_factor_6_depth_of_insight(paragraph),
        check_factor_7_counterarguments_limitations(paragraph),
        check_factor_8_citation_of_sources(paragraph),
        check_factor_9_critical_engagement_with_sources(paragraph),
        check_factor_10_questions_for_further_research(paragraph),
        check_factor_11_statistics_empirical_evidence(paragraph),
        check_factor_12_original_thought_interpretation(paragraph),
        check_factor_13_word_count(paragraph),
        check_factor_14_tone(paragraph),
        check_factor_15_style(paragraph),
        check_factor_16_purpose(paragraph),
        check_factor_17_nature(paragraph),
        check_factor_18_level(paragraph),
        check_factor_19_hedging(paragraph),
        check_factor_20_syntax(paragraph),
        check_factor_21_morphology(paragraph),
        check_factor_22_punctuation(paragraph),
        check_factor_23_role(paragraph),
        check_factor_24_type_of_work(paragraph)
    ]
    return sum(factors)

if __name__ == "__main__":
    dataset1_path = "Prompt Dataset.docx"
    dataset2_path = "Cognitive Dataset.docx"
    paragraphs_dataset1 = extract_paragraphs_from_docx(dataset1_path)[:12]
    paragraphs_dataset2 = extract_paragraphs_from_docx(dataset2_path)[:12]
    scores_dataset1 = [score_paragraph(p) for p in paragraphs_dataset1]
    scores_dataset2 = [score_paragraph(p) for p in paragraphs_dataset2]

    df = pd.DataFrame({
        "Paragraph #": list(range(1, 13)),
        "Dataset 1 Score": scores_dataset1,
        "Dataset 2 Score": scores_dataset2
    })

    print(df.to_string(index=False))


 Paragraph #  Dataset 1 Score  Dataset 2 Score
           1                7                6
           2                5                5
           3                7                6
           4                8                6
           5               10                7
           6               13                6
           7               13                6
           8               14                6
           9               15                6
          10               17                6
          11               18                6
          12               19                7


In [5]:
!pip install python-docx
!pip install pandas

import docx
import re
import pandas as pd

def extract_paragraphs_from_docx(file_path):
    doc = docx.Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    combined_text = "\n".join(full_text)
    paragraphs = [p.strip() for p in combined_text.split('####') if p.strip()]
    return paragraphs

def check_factor_1_clear_argumentation(paragraph):
    patterns = ["this implies", "the underlying cause", "a possible interpretation is"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_2_diverse_perspectives(paragraph):
    patterns = ["an interdisciplinary view suggests", "cross-cultural studies show"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_3_comparative_analysis(paragraph):
    patterns = ["in contrast", "similarly", "while this suggests", "a key difference lies in"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_4_logical_structure(paragraph):
    patterns = ["therefore", "however", "consequently", "additionally"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_5_theoretical_application(paragraph):
    patterns = ["theory", "framework", "model", "example"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_6_depth_of_insight(paragraph):
    patterns = ["this implies", "the underlying cause", "a possible interpretation is"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_7_counterarguments_limitations(paragraph):
    patterns = ["a limitation of this approach is", "critics argue", "this overlooks"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_8_citation_of_sources(paragraph):
    patterns = ["as noted by", "according to recent studies", "[author,", " (20"]
    has_citation = bool(re.search(r"\(\w+,\s*\d{4}\)", paragraph))
    return 1 if any(p in paragraph.lower() for p in patterns) or has_citation else 0

def check_factor_9_critical_engagement_with_sources(paragraph):
    patterns = ["critically", "challenge the assumption", "critique", "critical analysis"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_10_questions_for_further_research(paragraph):
    patterns = ["future studies could explore", "this raises questions about"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_11_statistics_empirical_evidence(paragraph):
    patterns = ["peer-reviewed", "according to", "as noted by", "evidence", "data from"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_12_original_thought_interpretation(paragraph):
    patterns = ["this suggests a novel approach", "an unexamined possibility is"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_13_word_count(paragraph):
    words = paragraph.split()
    return 1 if len(words) == 300 else 0

def check_factor_14_tone(paragraph):
    slang_words = ["gonna", "wanna", "kiddo", "ain't"]
    return 0 if any(s in paragraph.lower() for s in slang_words) else 1

def check_factor_15_style(paragraph):
    # Uses factor_2 and factor_11 checks for academic style
    if check_factor_2_diverse_perspectives(paragraph) == 1 or check_factor_11_statistics_empirical_evidence(paragraph) == 1:
        return 1
    else:
        return 0

def check_factor_16_purpose(paragraph):
    patterns = ["ideal written academic writing paragraph", "the purpose of this paragraph"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_17_nature(paragraph):
    forbidden = ["align", "nuanced", "comprehensive", "underscore", "interplay", "ultimately"]
    if "because" in paragraph.lower():
        if not any(f in paragraph.lower() for f in forbidden):
            return 1
    return 0

def check_factor_18_level(paragraph):
    patterns = ["masters-level", "postgraduate level", "msc level"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_19_hedging(paragraph):
    hedging_words = ["may", "might", "could", "possibly", "arguably"]
    return 1 if any(w in paragraph.lower() for w in hedging_words) else 0

def check_factor_20_syntax(paragraph):
    if re.search(r",\s*\w+ing\b", paragraph):
        return 0
    else:
        return 1

def check_factor_21_morphology(paragraph):
    patterns = ["significant", "noteworthy", "validity"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_22_punctuation(paragraph):
    return 1 if "because" in paragraph.lower() else 0

def check_factor_23_role(paragraph):
    patterns = ["as a student of msc", "in my postgraduate studies", "from an msc perspective"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def check_factor_24_type_of_work(paragraph):
    patterns = ["essay", "this essay"]
    return 1 if any(p in paragraph.lower() for p in patterns) else 0

def factor_scores(paragraph):
    return [
        check_factor_1_clear_argumentation(paragraph),
        check_factor_2_diverse_perspectives(paragraph),
        check_factor_3_comparative_analysis(paragraph),
        check_factor_4_logical_structure(paragraph),
        check_factor_5_theoretical_application(paragraph),
        check_factor_6_depth_of_insight(paragraph),
        check_factor_7_counterarguments_limitations(paragraph),
        check_factor_8_citation_of_sources(paragraph),
        check_factor_9_critical_engagement_with_sources(paragraph),
        check_factor_10_questions_for_further_research(paragraph),
        check_factor_11_statistics_empirical_evidence(paragraph),
        check_factor_12_original_thought_interpretation(paragraph),
        check_factor_13_word_count(paragraph),
        check_factor_14_tone(paragraph),
        check_factor_15_style(paragraph),
        check_factor_16_purpose(paragraph),
        check_factor_17_nature(paragraph),
        check_factor_18_level(paragraph),
        check_factor_19_hedging(paragraph),
        check_factor_20_syntax(paragraph),
        check_factor_21_morphology(paragraph),
        check_factor_22_punctuation(paragraph),
        check_factor_23_role(paragraph),
        check_factor_24_type_of_work(paragraph)
    ]

if __name__ == "__main__":
    dataset1_path = "Prompt Dataset.docx"
    dataset2_path = "Cognitive Dataset.docx"

    paragraphs_dataset1 = extract_paragraphs_from_docx(dataset1_path)[:12]
    paragraphs_dataset2 = extract_paragraphs_from_docx(dataset2_path)[:12]

    dataset1_factors = [factor_scores(p) for p in paragraphs_dataset1]
    dataset2_factors = [factor_scores(p) for p in paragraphs_dataset2]

    dataset1_sum = [sum(factor[i] for factor in dataset1_factors) for i in range(24)]
    dataset2_sum = [sum(factor[i] for factor in dataset2_factors) for i in range(24)]

    df_factors = pd.DataFrame({
        "Checklist Item": [
            "Clear Argumentation",
            "Diverse Perspectives",
            "Comparative Analysis",
            "Logical Structure",
            "Theoretical Application",
            "Depth of Insight",
            "Counterarguments, Limitations",
            "Citation of Sources",
            "Critical Engagement with Sources",
            "Questions for Further Research",
            "Statistics and Empirical Evidence",
            "Original Thought, Interpretation",
            "Word count",
            "Tone",
            "Style",
            "Purpose",
            "Nature",
            "Level",
            "Hedging",
            "Syntax",
            "Morphology",
            "Punctuation",
            "Role",
            "Type of work"
        ],
        "Dataset 1 Score": dataset1_sum,
        "Dataset 2 Score": dataset2_sum
    })

    print("Factor-wise collective scores:")
    print(df_factors.to_string(index=False))

    highest_factor_dataset1 = df_factors.loc[df_factors["Dataset 1 Score"].idxmax(), "Checklist Item"]
    highest_score_dataset1 = df_factors["Dataset 1 Score"].max()
    lowest_factor_dataset1 = df_factors.loc[df_factors["Dataset 1 Score"].idxmin(), "Checklist Item"]
    lowest_score_dataset1 = df_factors["Dataset 1 Score"].min()

    highest_factor_dataset2 = df_factors.loc[df_factors["Dataset 2 Score"].idxmax(), "Checklist Item"]
    highest_score_dataset2 = df_factors["Dataset 2 Score"].max()
    lowest_factor_dataset2 = df_factors.loc[df_factors["Dataset 2 Score"].idxmin(), "Checklist Item"]
    lowest_score_dataset2 = df_factors["Dataset 2 Score"].min()

    print("\nSummary:")
    print(f"Dataset 1 - Highest scoring factor: {highest_factor_dataset1} with score {highest_score_dataset1}")
    print(f"Dataset 1 - Lowest scoring factor: {lowest_factor_dataset1} with score {lowest_score_dataset1}")
    print(f"Dataset 2 - Highest scoring factor: {highest_factor_dataset2} with score {highest_score_dataset2}")
    print(f"Dataset 2 - Lowest scoring factor: {lowest_factor_dataset2} with score {lowest_score_dataset2}")


Factor-wise collective scores:
                   Checklist Item  Dataset 1 Score  Dataset 2 Score
              Clear Argumentation                7                0
             Diverse Perspectives                5                0
             Comparative Analysis               10                0
                Logical Structure               10                2
          Theoretical Application                9                1
                 Depth of Insight                7                0
    Counterarguments, Limitations                7                0
              Citation of Sources                1                0
 Critical Engagement with Sources               11               12
   Questions for Further Research                2                0
Statistics and Empirical Evidence               12               12
 Original Thought, Interpretation                3                0
                       Word count                0                0
                 