In [1]:
import os
import subprocess
import json

**Q&A from insights**

In [5]:
import os
import json
import subprocess
import re

# Use absolute path to avoid FileNotFoundError
base_dir = os.path.abspath("..")
data_dir = os.path.join(base_dir, "data")
output_dir = os.path.join(base_dir, "outputs", "insights_qa")
os.makedirs(output_dir, exist_ok=True)

# Check if data directory exists
if not os.path.exists(data_dir):
    raise FileNotFoundError(f"Data folder not found at: {data_dir}")

prompt_template = """
You are an expert in football analysis. Generate 5 structured question-answer pairs from the following match report.
Use concise and clear answers. Output the result as a JSON list with keys 'question' and 'answer'.

Match Report:
{report_text}
"""

def parse_qa_pairs(text):
    # Regular expression to match Q&A pairs with "Question" and "Answer" prefixes
    qa_pattern = re.compile(r"Question (\d+): (.+)\nAnswer: (.+)")
    matches = qa_pattern.findall(text)

    qa_pairs = []
    for match in matches:
        qa_pairs.append({
            "question": match[1].strip(),
            "answer": match[2].strip()
        })
    return qa_pairs

def generate_qa_from_text(text):
    prompt = prompt_template.format(report_text=text)

    try:
        result = subprocess.run(
            ["ollama", "run", "llama2"],
            input=prompt.encode('utf-8'),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            timeout=120
        )
        output = result.stdout.decode("utf-8")
        print("Subprocess output:", output)  # Debugging statement
        parsed_pairs = parse_qa_pairs(output)
        print("Parsed Q&A pairs:", parsed_pairs)  # Debugging statement
        return parsed_pairs

    except subprocess.TimeoutExpired:
        print("Timeout reached while generating response")
    except Exception as e:
        print(f"An error occurred: {e}")
    return None

for folder in os.listdir(data_dir):
    match_path = os.path.join(data_dir, folder)
    if not os.path.isdir(match_path):
        continue

    txt_files = [f for f in os.listdir(match_path) if f.endswith(".txt")]
    if not txt_files:
        print(f"No report.txt found in {folder}")
        continue

    with open(os.path.join(match_path, txt_files[0]), "r", encoding="utf-8") as f:
        report_text = f.read()

    print(f"Generating Q&A for: {folder}")
    qa_pairs = generate_qa_from_text(report_text)

    if qa_pairs:
        out_path = os.path.join(output_dir, f"{folder}.json")
        with open(out_path, "w", encoding="utf-8") as out_f:
            json.dump(qa_pairs, out_f, indent=2, ensure_ascii=False)
        print(f"Saved: {out_path}")
    else:
        print(f"Skipped: {folder}")


Generating Q&A for: 1_Arg_vs_Saudi_G1
Subprocess output: Here are 5 structured question-answer pairs based on the provided match report:

Question 1: What was the final score in the match between Saudi Arabia and Argentina?
Answer: The final score was 2-1 in favor of Saudi Arabia.

Question 2: How did Lionel Messi perform in the match for Argentina?
Answer: Lionel Messi scored a penalty kick for Argentina in the first half, but Saudi Arabia was able to come back and defeat them.

Question 3: What was the key to Saudi Arabia's victory?
Answer: Saudi Arabia's ability to defend against Argentina's attacking runs and convert their chances into goals was the key to their victory.

Question 4: How did Argentina react to falling behind?
Answer: Argentina pushed forward in search of a way back into the game, but Saudi Arabia's defense held strong and they were unable to find an equalizer.

Question 5: What is the significance of Saudi Arabia's victory over Argentina?
Answer: Saudi Arabia's vic

**For missed files**

In [6]:
import os
import json
import subprocess
import re

# Use absolute path to avoid FileNotFoundError
base_dir = os.path.abspath("..")
data_dir = os.path.join(base_dir, "temp")
output_dir = os.path.join(base_dir, "outputs", "insights_qa")
os.makedirs(output_dir, exist_ok=True)

# Check if data directory exists
if not os.path.exists(data_dir):
    raise FileNotFoundError(f"Data folder not found at: {data_dir}")

prompt_template = """
You are an expert in football analysis. Generate 5 structured question-answer pairs from the following match report.
Use concise and clear answers. Output the result as a JSON list with keys 'question' and 'answer'.

Match Report:
{report_text}
"""

def parse_qa_pairs(text):
    # Regular expression to match Q&A pairs with "Question" and "Answer" prefixes
    qa_pattern = re.compile(r"Question (\d+): (.+)\nAnswer: (.+)")
    matches = qa_pattern.findall(text)

    qa_pairs = []
    for match in matches:
        qa_pairs.append({
            "question": match[1].strip(),
            "answer": match[2].strip()
        })
    return qa_pairs

def generate_qa_from_text(text):
    prompt = prompt_template.format(report_text=text)

    try:
        result = subprocess.run(
            ["ollama", "run", "llama2"],
            input=prompt.encode('utf-8'),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            timeout=120
        )
        output = result.stdout.decode("utf-8")
        print("Subprocess output:", output)  # Debugging statement
        parsed_pairs = parse_qa_pairs(output)
        print("Parsed Q&A pairs:", parsed_pairs)  # Debugging statement
        return parsed_pairs

    except subprocess.TimeoutExpired:
        print("Timeout reached while generating response")
    except Exception as e:
        print(f"An error occurred: {e}")
    return None

for folder in os.listdir(data_dir):
    match_path = os.path.join(data_dir, folder)
    if not os.path.isdir(match_path):
        continue

    txt_files = [f for f in os.listdir(match_path) if f.endswith(".txt")]
    if not txt_files:
        print(f"No report.txt found in {folder}")
        continue

    with open(os.path.join(match_path, txt_files[0]), "r", encoding="utf-8") as f:
        report_text = f.read()

    print(f"Generating Q&A for: {folder}")
    qa_pairs = generate_qa_from_text(report_text)

    if qa_pairs:
        out_path = os.path.join(output_dir, f"{folder}.json")
        with open(out_path, "w", encoding="utf-8") as out_f:
            json.dump(qa_pairs, out_f, indent=2, ensure_ascii=False)
        print(f"Saved: {out_path}")
    else:
        print(f"Skipped: {folder}")


Generating Q&A for: 3_Arg_vs_Pol_G3
Subprocess output: Here are 5 structured question-answer pairs based on the provided match report:

Question 1: Who won the match between Poland and Argentina?
Answer: Argentina won the match with a score of 2-0.

Question 2: What was the catalyst for Argentina's win against Mexico that got their campaign back on track?
Answer: Lionel Messi was the catalyst for the crucial win against Mexico, illustrating why they were regarded as one of the tournament favourites.

Question 3: How did Poland qualify for the last 16 in the World Cup?
Answer: Poland qualified for the last 16 by virtue of the Fifa Fair Play rule, having fewer yellow cards than Mexico.

Question 4: What was the score when Saudi Arabia pulled back a goal deep into stoppage time against Mexico?
Answer: The score was 2-1 in favour of Saudi Arabia when they pulled back a goal deep into stoppage time against Mexico, which had a significant impact on Poland's qualification.

Question 5: How di

In [20]:
import os
import json
import subprocess
import re

# Use absolute path to avoid FileNotFoundError
base_dir = os.path.abspath("..")
data_dir = os.path.join(base_dir, "data")
output_dir = os.path.join(base_dir, "outputs", "insights_qa")
os.makedirs(output_dir, exist_ok=True)

# Check if data directory exists
if not os.path.exists(data_dir):
    raise FileNotFoundError(f"Data folder not found at: {data_dir}")

prompt_template = """
You are a professional football analyst. Read the following match reports. Your goal is to generate 5 concise and high-quality question-answer pairs based on Argentina's performance in the match.
You may refer to the opponent's strategy or events only when it directly relates to Argentina’s tactics, decisions, or key moments.
Use concise and clear answers. Output the result as a JSON list with keys 'question' and 'answer'.

Instructions:
1. Read the match report carefully.
2. Identify key moments, strategies, and decisions made by Argentina.
3. Formulate questions that are relevant to Argentina's performance.
4. Provide clear and concise answers based on the report.
5. Ensure that the output is in JSON format with 'question' and 'answer' keys.

Match Report:
{report_text}
"""


def parse_qa_pairs(text):
    # Regular expression to match Q&A pairs with both "Question/Answer" and "Q/A" prefixes
    qa_pattern = re.compile(r"(?:Question|Q(?:uestion)?) (\d+)?: (.+)\n(?:Answer|A(?:nswer)?): (.+)")
    matches = qa_pattern.findall(text)

    qa_pairs = []
    for match in matches:
        qa_pairs.append({
            "question": match[1].strip(),
            "answer": match[2].strip()
        })
    return qa_pairs

def generate_qa_from_text(text):
    prompt = prompt_template.format(report_text=text)

    try:
        result = subprocess.run(
            ["ollama", "run", "llama2"],
            input=prompt.encode('utf-8'),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            timeout=120
        )
        output = result.stdout.decode("utf-8")
        print("Subprocess output:", output)  # Debugging statement
        parsed_pairs = parse_qa_pairs(output)
        print("Parsed Q&A pairs:", parsed_pairs)  # Debugging statement
        return parsed_pairs

    except subprocess.TimeoutExpired:
        print("Timeout reached while generating response")
    except Exception as e:
        print(f"An error occurred: {e}")
    return None

for folder in os.listdir(data_dir):
    match_path = os.path.join(data_dir, folder)
    if not os.path.isdir(match_path):
        continue

    txt_files = [f for f in os.listdir(match_path) if f.endswith(".txt")]
    if not txt_files:
        print(f"No report.txt found in {folder}")
        continue

    print(f"Processing folder: {folder}")  # Debugging statement
    print(f"Found txt files: {txt_files}")  # Debugging statement

    for txt_file in txt_files:
        txt_file_path = os.path.join(match_path, txt_file)
        print(f"Reading file: {txt_file_path}")  # Debugging statement

        with open(txt_file_path, "r", encoding="utf-8") as f:
            report_text = f.read()

        print(f"Generating Q&A for: {folder}/{txt_file}")
        qa_pairs = generate_qa_from_text(report_text)

        if qa_pairs:
            # Include the base name of the txt file in the output JSON file name
            out_file_name = f"{folder}_{os.path.splitext(txt_file)[0]}.json"
            out_path = os.path.join(output_dir, out_file_name)
            with open(out_path, "w", encoding="utf-8") as out_f:
                json.dump(qa_pairs, out_f, indent=2, ensure_ascii=False)
            print(f"Saved: {out_path}")
        else:
            print(f"Skipped: {folder}/{txt_file}")

print("Processing complete.")


Processing folder: 1_Arg_vs_Saudi_G1
Found txt files: ['Text_report_BBC.txt', 'Text_Report_The_Guardian.txt']
Reading file: d:\Masters\hcnlp_project\data\1_Arg_vs_Saudi_G1\Text_report_BBC.txt
Generating Q&A for: 1_Arg_vs_Saudi_G1/Text_report_BBC.txt
Subprocess output: Here are five question-answer pairs based on Argentina's performance in the match:

Question: How did Argentina fare against Saudi Arabia in their World Cup opener?
Answer: Argentina were defeated by Saudi Arabia in a stunning upset, losing 2-1.

Question: What was the key to Saudi Arabia's victory over Argentina?
Answer: Saudi Arabia's solid defense and clinical finishing were the main factors in their win over Argentina.

Question: How did Lionel Messi perform for Argentina in the match?
Answer: Messi scored Argentina's only goal from a penalty kick, but he was unable to convert any of his team's other good scoring chances.

Question: How did Saudi Arabia's players celebrate their historic victory over Argentina?
Answer

In [2]:
import os
import json
import subprocess
import re

# Base and data/output directory setup
base_dir = os.path.abspath("..")
data_dir = os.path.join(base_dir, "data")
output_dir = os.path.join(base_dir, "outputs", "insights_qa")
os.makedirs(output_dir, exist_ok=True)

# Verify data folder exists
if not os.path.exists(data_dir):
    raise FileNotFoundError(f"Data folder not found at: {data_dir}")

# Updated prompt with stricter output instruction
prompt_template = """
You are a professional football analyst. Read the following match reports. Your goal is to generate 5 concise and high-quality question-answer pairs based on Argentina's performance in the match.

Only output a JSON array like this:
[
  {{ "question": "What did Argentina do well?", "answer": "They controlled possession and created multiple chances." }},
  {{ "question": "Who was the standout player for Argentina?", "answer": "Lionel Messi led the attack and scored the opening goal." }}
]

Do NOT add any explanation or extra text. Only return the JSON.

Match Report:
{report_text}
"""


def parse_qa_pairs(output):
    import json

    # 1. Try JSON list
    try:
        json_start = output.find('[')
        json_end = output.rfind(']') + 1
        json_data = output[json_start:json_end]
        qa_list = json.loads(json_data)
        if isinstance(qa_list, list) and all("question" in qa and "answer" in qa for qa in qa_list):
            return qa_list
    except Exception as e:
        print("⚠️ JSON parsing failed. Trying fallback:", str(e))

    # 2. Fallback: Numbered Q&A like "1. What...? - Answer"
    qa_pattern = re.compile(r"\d+\.\s*(.+?)\s*[-–]\s*(.+)")
    matches = qa_pattern.findall(output)
    if matches:
        return [{"question": q.strip(), "answer": a.strip()} for q, a in matches]

    # 3. Fallback: Many individual JSON objects (not in a list)
    qa_json_pattern = re.compile(r'\{\s*"question":\s*".+?",\s*"answer":\s*".+?"\s*\}', re.DOTALL)
    matches = qa_json_pattern.findall(output)
    try:
        return [json.loads(block) for block in matches]
    except Exception as e:
        print("⚠️ Could not parse individual QA JSON blocks:", str(e))

    return []  # Still nothing matched



def generate_qa_from_text(text):
    prompt = prompt_template.format(report_text=text)

    try:
        result = subprocess.run(
            ["ollama", "run", "llama2"],
            input=prompt.encode('utf-8'),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            timeout=120
        )
        output = result.stdout.decode("utf-8").strip()
        print("Subprocess output:", output[:300], "...\n")  # Truncated for brevity
        return parse_qa_pairs(output)

    except subprocess.TimeoutExpired:
        print("⏱️ Timeout reached while generating response")
    except Exception as e:
        print(f"❌ Error during subprocess: {e}")
    return None

# Main loop to process each match folder
for folder in os.listdir(data_dir):
    match_path = os.path.join(data_dir, folder)
    if not os.path.isdir(match_path):
        continue

    txt_files = [f for f in os.listdir(match_path) if f.endswith(".txt")]
    if not txt_files:
        print(f"No text files found in {folder}")
        continue

    print(f"\n📂 Processing match: {folder}")
    for txt_file in txt_files:
        txt_file_path = os.path.join(match_path, txt_file)

        with open(txt_file_path, "r", encoding="utf-8") as f:
            report_text = f.read()

        print(f"📝 Generating Q&A for file: {txt_file}")
        qa_pairs = generate_qa_from_text(report_text)

        if qa_pairs:
            out_file_name = f"{folder}_{os.path.splitext(txt_file)[0]}.json"
            out_path = os.path.join(output_dir, out_file_name)
            with open(out_path, "w", encoding="utf-8") as out_f:
                json.dump(qa_pairs, out_f, indent=2, ensure_ascii=False)
            print(f"✅ Saved: {out_path}")
        else:
            print(f"⚠️ Skipped (no Q&A parsed): {txt_file}")

print("\n🎉 All processing complete.")



📂 Processing match: 1_Arg_vs_Saudi_G1
📝 Generating Q&A for file: Text_report_BBC.txt
Subprocess output: Here are five concise question-answer pairs based on Argentina's performance in the match:

{ "question": "What did Argentina do well?", "answer": "They controlled possession and created multiple chances." }

{ "question": "Who was the standout player for Argentina?", "answer": "Lionel Messi led the ...

⚠️ JSON parsing failed. Trying fallback: Expecting value: line 1 column 1 (char 0)
✅ Saved: d:\Masters\hcnlp_project\outputs\insights_qa\1_Arg_vs_Saudi_G1_Text_report_BBC.json
📝 Generating Q&A for file: Text_Report_The_Guardian.txt
Subprocess output: Here are 5 question-answer pairs based on Argentina's performance in the match:

1. What did Argentina do well?
Answer: Argentina controlled possession and created multiple chances.
2. Who was the standout player for Argentina?
Answer: Lionel Messi led the attack and scored the opening goal.
3. How  ...

⚠️ JSON parsing failed. Trying f

In [None]:
import os
import csv
import subprocess
import re

# Base and data/output directory setup
base_dir = os.path.abspath("..")
data_dir = os.path.join(base_dir, "data")
output_dir = os.path.join(base_dir, "outputs", "insights_qa")
os.makedirs(output_dir, exist_ok=True)

# Verify data folder exists
if not os.path.exists(data_dir):
    raise FileNotFoundError(f"Data folder not found at: {data_dir}")

# Updated prompt with stricter output instruction
prompt_template = """
You are a professional football analyst. Read the following match reports. Your goal is to generate 5 concise and high-quality question-answer pairs based on Argentina's performance in the match.
You may refer to the opponent's strategy or events only when it directly relates to Argentina’s tactics, decisions, or key moments.
Use concise and clear answers. Output the result as a JSON list with keys 'question' and 'answer'.

Instructions:
1. Read the match report carefully.
2. Identify key moments, strategies, and decisions made by Argentina.
3. Formulate questions that are relevant to Argentina's performance.
4. Provide clear and concise answers based on the report.
5. Ensure that the output is in JSON format with 'question' and 'answer' keys.

Match Report:
{report_text}
"""

def parse_qa_pairs(output):
    import json

    # 1. Try JSON list
    try:
        json_start = output.find('[')
        json_end = output.rfind(']') + 1
        json_data = output[json_start:json_end]
        qa_list = json.loads(json_data)
        if isinstance(qa_list, list) and all("question" in qa and "answer" in qa for qa in qa_list):
            return qa_list
    except Exception as e:
        print("JSON parsing failed. Trying fallback:", str(e))

    # 2. Fallback: Numbered Q&A like "1. What...? - Answer"
    qa_pattern = re.compile(r"\d+\.\s*(.+?)\s*[-–]\s*(.+)")
    matches = qa_pattern.findall(output)
    if matches:
        return [{"question": q.strip(), "answer": a.strip()} for q, a in matches]

    # 3. Fallback: Many individual JSON objects (not in a list)
    qa_json_pattern = re.compile(r'\{\s*"question":\s*".+?",\s*"answer":\s*".+?"\s*\}', re.DOTALL)
    matches = qa_json_pattern.findall(output)
    try:
        return [json.loads(block) for block in matches]
    except Exception as e:
        print("Could not parse individual QA JSON blocks:", str(e))

    return []  # Still nothing matched

def generate_qa_from_text(text):
    prompt = prompt_template.format(report_text=text)

    try:
        result = subprocess.run(
            ["ollama", "run", "llama2"],
            input=prompt.encode('utf-8'),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            timeout=120
        )
        output = result.stdout.decode("utf-8").strip()
        print("Subprocess output:", output[:300], "...\n")  # Truncated for brevity
        return parse_qa_pairs(output), output

    except subprocess.TimeoutExpired:
        print("Timeout reached while generating response")
    except Exception as e:
        print(f"Error during subprocess: {e}")
    return None, None

def save_qa_to_csv(qa_pairs, file_path):
    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=["question", "answer"])
        writer.writeheader()
        for qa in qa_pairs:
            writer.writerow(qa)

# Main loop to process each match folder
for folder in os.listdir(data_dir):
    match_path = os.path.join(data_dir, folder)
    if not os.path.isdir(match_path):
        continue

    txt_files = [f for f in os.listdir(match_path) if f.endswith(".txt")]
    if not txt_files:
        print(f"No text files found in {folder}")
        continue

    print(f"\nProcessing match: {folder}")
    for txt_file in txt_files:
        txt_file_path = os.path.join(match_path, txt_file)

        with open(txt_file_path, "r", encoding="utf-8") as f:
            report_text = f.read()

        print(f"Generating Q&A for file: {txt_file}")
        qa_pairs, raw_output = generate_qa_from_text(report_text)

        if qa_pairs:
            out_file_name = f"{folder}_{os.path.splitext(txt_file)[0]}.csv"
            out_path = os.path.join(output_dir, out_file_name)
            save_qa_to_csv(qa_pairs, out_path)
            print(f"Saved: {out_path}")
        else:
            print(f"Skipped (no Q&A parsed): {txt_file}")
            # Save raw output to a text file
            raw_out_file_name = f"{folder}_{os.path.splitext(txt_file)[0]}_raw.txt"
            raw_out_path = os.path.join(output_dir, raw_out_file_name)
            with open(raw_out_path, "w", encoding="utf-8") as raw_out_f:
                raw_out_f.write(raw_output)
            print(f"📝 Saved raw output: {raw_out_path}")

print("\nAll processing complete.")



📂 Processing match: 1_Arg_vs_Saudi_G1
📝 Generating Q&A for file: Text_report_BBC.txt
Subprocess output: Here are five question-answer pairs based on Argentina's performance in the match:

Question 1: What was Argentina's record before the World Cup?
Answer: Argentina had a 36-game unbeaten run that included winning the 2021 Copa America.

Question 2: How did Saudi Arabia's players react at full-time?
 ...

⚠️ JSON parsing failed. Trying fallback: Expecting value: line 1 column 1 (char 0)
⚠️ Skipped (no Q&A parsed): Text_report_BBC.txt
📝 Saved raw output: d:\Masters\hcnlp_project\outputs\insights_qa\1_Arg_vs_Saudi_G1_Text_report_BBC_raw.txt
📝 Generating Q&A for file: Text_Report_The_Guardian.txt
Subprocess output: Here are 5 question-answer pairs based on Argentina's performance in the match:

Question 1: What was the final score of the match between Argentina and Saudi Arabia?
Answer: The final score was Saudi Arabia 2, Argentina 1.

Question 2: Who scored the winning goal for Saudi A

In [1]:
import os
import json
import re

# Base and data/output directory setup
base_dir = os.path.abspath("..")
data_dir = os.path.join(base_dir, "outputs", "insights_qa")
output_dir = os.path.join(base_dir, "outputs", "insights_qa_json")
os.makedirs(output_dir, exist_ok=True)

def parse_qa_pairs(text):
    # Regular expression to match Q&A pairs with flexible formats
    qa_pattern = re.compile(r"(?:Question|Q(?:uestion)?)?\s*(\d*)\s*[:-]\s*(.+)\n(?:Answer|A(?:nswer)?)?\s*[:-]\s*(.+)", re.IGNORECASE)
    matches = qa_pattern.findall(text)

    qa_pairs = []
    for match in matches:
        qa_pairs.append({
            "question": match[1].strip(),
            "answer": match[2].strip()
        })
    return qa_pairs

def save_qa_to_json(qa_pairs, file_path):
    with open(file_path, "w", encoding="utf-8") as file:
        json.dump(qa_pairs, file, indent=2, ensure_ascii=False)

# Main loop to process each raw text file
for file_name in os.listdir(data_dir):
    if file_name.endswith("_raw.txt"):
        file_path = os.path.join(data_dir, file_name)
        print(f"Processing file: {file_path}")

        with open(file_path, "r", encoding="utf-8") as file:
            raw_output = file.read()

        qa_pairs = parse_qa_pairs(raw_output)

        if qa_pairs:
            json_file_name = file_name.replace("_raw.txt", ".json")
            json_file_path = os.path.join(output_dir, json_file_name)
            save_qa_to_json(qa_pairs, json_file_path)
            print(f"Saved: {json_file_path}")
        else:
            print(f"Skipped (no Q&A parsed): {file_name}")

print("Processing complete.")


Processing file: d:\Masters\hcnlp_project\outputs\insights_qa\1_Arg_vs_Saudi_G1_Text_report_BBC_raw.txt
Saved: d:\Masters\hcnlp_project\outputs\insights_qa_json\1_Arg_vs_Saudi_G1_Text_report_BBC.json
Processing file: d:\Masters\hcnlp_project\outputs\insights_qa\1_Arg_vs_Saudi_G1_Text_Report_The_Guardian_raw.txt
Saved: d:\Masters\hcnlp_project\outputs\insights_qa_json\1_Arg_vs_Saudi_G1_Text_Report_The_Guardian.json
Processing file: d:\Masters\hcnlp_project\outputs\insights_qa\2_Arg_vs_Mex_G2_Text_Report_BBC_raw.txt
Saved: d:\Masters\hcnlp_project\outputs\insights_qa_json\2_Arg_vs_Mex_G2_Text_Report_BBC.json
Processing file: d:\Masters\hcnlp_project\outputs\insights_qa\2_Arg_vs_Mex_G2_Text_Report_The_Guardian_raw.txt
Saved: d:\Masters\hcnlp_project\outputs\insights_qa_json\2_Arg_vs_Mex_G2_Text_Report_The_Guardian.json
Processing file: d:\Masters\hcnlp_project\outputs\insights_qa\3_Arg_vs_Pol_G3_Text_Report_BBC_raw.txt
Saved: d:\Masters\hcnlp_project\outputs\insights_qa_json\3_Arg_vs_Pol_