In [7]:
import random
import pandas as pd
import ollama
import json
import re

def process_articles(true_articles, size_of_sample, model_name):
    # Define patterns
    CHANGES3 = [['actor', 'You need to choose different facts for the value "Actor". Changed facts must bear different meaning. Common actors of Syrian war are: rebel groups, Russian forces, ISIS, the Syrian army, USA army, etc.'],
                ['cause of death', 'You need to choose different facts for the value "Cause of death". Changed facts must bear different meaning. Examples are shooting, shelling, chemical weapons, explosians, etc.'],
                ['date of death', """You need to choose different dates for the value "Date of death". Changed facts must bear different meaning. """],
                ['Place of death', """You need to choose different facts for the value "Place of death". Changed facts must bear different meaning. Common places are Aleppo, Damascus, Homs, Idlib, Hasaka, Deir ez-Zor, Daraa, Qamishli or Tartus."""],
                ['name of casualty or group', 'You need to choose different facts for the value "Name of casualty or group". Changed facts must bear different meaning. Common examples are: Civilians (General category encompassing men, women, and children), Syrian Army (SAA), Free Syrian Army (FSA), National Liberation Front (NLF), Hay\'at Tahrir al-Sham (HTS), People\'s Protection Units (YPG), Syrian Democratic Forces (SDF), Islamic State (ISIS), Hezbollah,Russian Forces, The White Helmets (Syrian Civil Defence), Kurdish Female Fighters (YPJ), Foreign Fighters (joining various factions),Al-Nusra Front Commanders, Civilians in Refugee Camps'],
                ]

    TOPICS = [["Name of casualty or group", " represents the casualties' names or the names of the groups associated with the casualties."],
                ["Gender or age group", " of casualty indicates if the casualties are male or female, or specify their age group (e.g., child, adult, senior)."],
                ["Cause of death", " specifies the weapons used by the aggressor (e.g., shooting, shelling, chemical weapons, etc.)"],
                ["Type", " of casualty classifies the casualties as a civilian or non-civilian (e.g., military personnel are non-civilians)."],
                ["Actor", " identifies the actors responsible for the incident, such as rebel groups, Russian forces, ISIS, the Syrian army, U.S. military, etc."],
                ["Place of death", " specifies the locations where the attacks occurred (e.g., Aleppo, Damascus, Homs, Idlib, Raqqa, Daraa, Deir ez-Zor, Qamishli, Palmyra, etc.)."],
                ["Date of death", " provides the dates when the attacks occurred."]]

    # Define prompts
    change_one_information_prompt = """Below is an article about an event during the Syrian war dated from 2015 to 2017:

    BEGINNING OF THE ARTICLE
    {article}
    END OF THE ARTICLE

    Here is the related data extracted from the article in JSON format:

    {extracted_data}

    Please, follow the instructions:
    Step 1: {change_data_1}
    Step 2: Generate a new article in which you will change the information for {change_topic_1}. Ensure that all occurrences of {change_topic_1} are changed. Try to change even the sentences that are connected to {change_topic_1}.
    Step 3: Paraphrase the article generated in step 2 once more. Make sure that the {change_topic_1} is changed according to first step, while maintaining other information. This article should begin with the phrase "BEGINNING OF THE ARTICLE" and end with "END OF THE ARTICLE". Make sure you include those phrases.
    Step 4: Create a new JSON file, which is the same as the old one, with the exception of {change_topic_1}, which is given new information. JSON file should be displayed in standard notation, with use of double and not single quotes.
    """

    change_one_information_prompt = """Below is an article about an event during the Syrian war, dated from 2015 to 2017:

    BEGINNING OF THE ARTICLE
    {article}
    END OF THE ARTICLE

    Here is the related data extracted from the article in JSON format:

    {extracted_data}

    Please follow these instructions:
    Step 1: {change_data_1}
    Step 2: Generate a new version of the article by modifying the information related to {change_topic_1}. Ensure that all instances of {change_topic_1} are altered, including sentences that are connected to it.
    Step 3: Paraphrase the article from Step 2 while ensuring that {change_topic_1} is modified according to Step 1. Preserve all other information. The revised article should begin with the phrase "BEGINNING OF THE ARTICLE" and end with "END OF THE ARTICLE", including these exact phrases.
    Step 4: Create a new JSON file that mirrors the original, except for the updated {change_topic_1}. The JSON file should be displayed using double quotes, not single quotes, following standard JSON notation.
    """


    prompt2_shorter = """You are a journalist tasked with analyzing an article that reports on casualties related to the war in Syria. Your goal is to extract specific information regarding attacks and casualties mentioned in the article.

    BEGINNING OF THE ARTICLE
    {article}
    END OF THE ARTICLE

    Please extract the facts about the {topic} from the article. {topic}{topic_content}. Be as concise as you can and do not output information that does not relate to {topic}, and if information is not present in the article, output "No information". Output should not exceed 15 words and should be written in the following format:
    BEGINNING OF FACTS
    <Display the extracted facts.>
    END OF FACTS
    """

    def find_json(text):
        content = []
        pattern0 = (
            r'.*Name of casualty or group.*:\s*(.*?),.*\n'
            r'.*Gender or age group.*:\s*(.*?),.*\n'
            r'.*Cause of death.*:\s*(.*?),.*\n'
            r'.*Type.*:\s*(.*?),.*\n'
            r'.*Actor.*:\s*(.*?),.*\n'
            r'.*Place of death.*:\s*(.*?),.*\n'
            r'.*Date of death.*:\s*(.*?)\s*\n'
        )
        pattern1 = (
            r'\{.*Name of casualty or group.*:\s*(.*?),.*'
            r'.*Gender or age group.*:\s*(.*?),.*'
            r'.*Cause of death.*:\s*(.*?),.*'
            r'.*Type.*:\s*(.*?),.*'
            r'.*Actor.*:\s*(.*?),.*'
            r'.*Place of death.*:\s*(.*?),.*'
            r'.*Date of death.*:\s*(.*?)\}.*'
        )

        matches = re.finditer(pattern0, text)
        for match in matches:
            content_dict = {
                "Name of casualty or group": match.group(1),
                "Gender or age group": match.group(2),
                "Cause of death": match.group(3),
                "Type": match.group(4),
                "Actor": match.group(5),
                "Place of death": match.group(6),
                "Date of death": match.group(7),
            }
            content.append(content_dict)

        matches = re.finditer(pattern1, text)
        for match in matches:
            content_dict = {
                "Name of casualty or group": match.group(1),
                "Gender or age group": match.group(2),
                "Cause of death": match.group(3),
                "Type": match.group(4),
                "Actor": match.group(5),
                "Place of death": match.group(6),
                "Date of death": match.group(7),
            }
            content.append(content_dict)

        return content

    def extract_information(text):
        pattern = r"BEGINNING OF FACTS(.*?)END OF FACTS"
        match = re.search(pattern, text, re.DOTALL)
        if match:
            return match.group(1).strip()
        return None

    def extract_article(text):
        pattern = r"BEGINNING OF THE ARTICLE(.*?)END OF THE ARTICLE"
        match = re.search(pattern, text, re.DOTALL)
        if match:
            return match.group(1).strip()
        return None

    def print_readable_dict(data):
        print(json.dumps(data, indent=4, ensure_ascii=False))

    indices = random.sample(list(range(len(true_articles))), size_of_sample)
    results_new_prompt = []

    for index in indices:
        if len(list(true_articles['article_content'])[index]) < 450:
            json_dict = {}

            for topic in TOPICS:
                response = ollama.chat(
                    model=model_name,
                    messages=[
                        {
                            'role': 'user',
                            'content': prompt2_shorter.format(
                                article=list(true_articles['article_content'])[index],
                                topic=topic[0],
                                topic_content=topic[1]
                            ),
                            'temperature': 0.2,
                        }
                    ]
                )

                generated = response['message']['content']
                print("Generated fact:", generated)
                json_dict[topic[0]] = extract_information(generated)

            if None in json_dict.values():
                print("No JSON found. This is generated text:\n", generated)
            else:
                topic_to_change = random.sample(CHANGES3, 2)
                print("We will change: ", topic_to_change[0][0], topic_to_change[1][0])
                response = ollama.chat(
                    model=model_name,
                    messages=[
                        {
                            'role': 'user',
                            'content': change_one_information_prompt.format(
                                article=list(true_articles['article_content'])[index],
                                extracted_data=json.dumps(json_dict, indent=4, ensure_ascii=False),
                                change_topic_1=topic_to_change[0][0],
                                change_data_1=topic_to_change[0][1],
                            ),
                            'temperature': 0.2,
                        }
                    ]
                )

                generated = response['message']['content']
                print("First generated article: ", generated)
                changed_json = find_json(generated)
                print(changed_json)
                changed_article = extract_article(generated)
                print(changed_article)

                if changed_article is None or not changed_json:
                    results_new_prompt.append((index, None, None, list(true_articles['article_content'])[index], json_dict))
                else:
                    response = ollama.chat(
                        model=model_name,
                        messages=[
                            {
                                'role': 'user',
                                'content': change_one_information_prompt.format(
                                    article=changed_article,
                                    extracted_data=json.dumps(changed_json[0], indent=4, ensure_ascii=False),
                                    change_topic_1=topic_to_change[1][0],
                                    change_data_1=topic_to_change[1][1],
                                ),
                                'temperature': 0.2,
                            }
                        ]
                    )

                    generated = response['message']['content']
                    twice_changed_json = find_json(generated)
                    twice_changed_article = extract_article(generated)

                    print("SECOND generated article: ", generated)
                    print(twice_changed_json)
                    print(twice_changed_article)

                    results_new_prompt.append([
                        index, 
                        twice_changed_article, 
                        twice_changed_json[0] if twice_changed_json else None, 
                        list(true_articles['article_content'])[index], 
                        json_dict
                    ])

    return results_new_prompt

# Example usage:
df = pd.read_csv('FA-KES-Dataset.csv', encoding='ISO-8859-1')
true_articles = df[df['labels'] == 1]
results = process_articles(true_articles, 40,'llama3.1:8b' )

Generated fact: BEGINNING OF FACTS
No information.
END OF FACTS
Generated fact: BEGINING OF FACTS
No information.
END OF FACTS
Generated fact: BEGINING OF FACTS
Chemical attack.
END OF FACTS
Generated fact: BEGINNING OF FACTS
Chemical attack: No information.
Type of casualty: civilian.
END OF FACTS
Generated fact: BEGINNING OF FACTS
The actor(s) responsible for the chemical attack is/are terrorists.
END OF FACTS
Generated fact: BEGINING OF FACTS
Al-Hamadaniyeh area in Aleppo city
END OF FACTS
Generated fact: BEGINING OF FACTS
30 October 2016
END OF FACTS
No JSON found. This is generated text:
 BEGINING OF FACTS
30 October 2016
END OF FACTS


In [None]:
import csv
import json

# Define the CSV file name
csv_file = 'fake_articles_one_by_one_generation_new.csv'

def filter_tuples(tuples_list):
    """Filter out tuples that contain None."""
    return [tup for tup in tuples_list if None not in tup]

def append_to_csv(data, file_name):
    """Append filtered data to a CSV file."""
    with open(file_name, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Convert dictionaries in the tuples to JSON strings
        for row in data:
            converted_row = [json.dumps(item) if isinstance(item, dict) else item for item in row]
            writer.writerow(converted_row)
        print(f'Data has been appended to {file_name}')

# Assuming results_new_prompt is already defined and populated
filtered_data = filter_tuples(results)
#append_to_csv(filtered_data, csv_file)

print(f"Number of valid entries: {len(filtered_data)}")

# Print the second element (index 1) from each filtered tuple for demonstration purposes
for entry in filtered_data:
    print(entry[2])


In [2]:
import random
import ollama
import json
import re
import pandas as pd


# Read CSV file with specified encoding
df = pd.read_csv('FA-KES-Dataset.csv', encoding='ISO-8859-1')  # or use 'latin1'

# Filter data where 'labels' column is equal to 1
true_articles = df[df['labels'] == 1]

# Display the filtered data
print(len(true_articles))




CHANGES3 = [['actor', 'You need to choose different facts for the value "Actor". Changed facts must bear different meaning. Common actors of Syrian war are: rebel groups, Russian forces, ISIS, the Syrian army, USA army, etc.'],
['cause of death', 'You need to choose different facts for the value "Cause of death". Changed facts must bear different meaning. Examples are shooting, shelling, chemical weapons, explosians, etc.'],
['date of death', """You need to choose different dates for the value "Date of death". Changed facts must bear different meaning. """],
['Place of death', """You need to choose different facts for the value "Place of death". Changed facts must bear different meaning. Common places are Aleppo, Damascus, Homs, Idlib, Hasaka, Deir ez-Zor, Daraa, Qamishli or Tartus."""],
['name of casualty or group', 'You need to choose different facts for the value "Name of casualty or group". Changed facts must bear different meaning. Comon examples are: Civilians (General category encompassing men, women, and children), Syrian Army (SAA), Free Syrian Army (FSA), National Liberation Front (NLF), Hay\'at Tahrir al-Sham (HTS), People\'s Protection Units (YPG), Syrian Democratic Forces (SDF), Islamic State (ISIS), Hezbollah,Russian Forces, The White Helmets (Syrian Civil Defence), Kurdish Female Fighters (YPJ), Foreign Fighters (joining various factions),Al-Nusra Front Commanders, Civilians in Refugee Camps'],
]

def find_json(text):
    content = []

    # Regular expression pattern to match the structured JSON format
    #pattern0 = (
    #    r'.*"Name of casualty or group":\s*"(.*?)".*'
    #    r'.*"Gender or age group":\s*"(.*?)".*'
    #    r'.*"Cause of death":\s*"(.*?)".*'
    #    r'.*"Type":\s*"(.*?)".*'
    #    r'.*"Actor":\s*"(.*?)".*'
    #    r'.*"Place of death":\s*"(.*?)".*'
    #    r'.*"Date of death":\s*"(.*?)".*'
    #)
    #pattern1 = (
    #    r'.*"Name of casualty or group":\s*"(.*?)".*\n'
    #    r'.*"Gender or age group":\s*"(.*?)".*\n'
    #    r'.*"Cause of death":\s*"(.*?)".*\n'
    #    r'.*"Type":\s*"(.*?)".*\n'
    #    r'.*"Actor":\s*"(.*?)".*\n'
    #    r'.*"Place of death":\s*"(.*?)".*\n'
    #    r'.*"Date of death":\s*"(.*?)".*\n'
    #)
    #pattern2 = (
    #    r".*'Name of casualty or group':\s*'(.*?)'.*\n"
    #    r".*'Gender or age group':\s*'(.*?)'.*\n"
    #    r".*'Cause of death':\s*'(.*?)'.*\n"
    #    r".*'Type':\s*'(.*?)'.*\n"
    #    r".*'Actor':\s*'(.*?)'.*\n"
    #    r".*'Place of death':\s*'(.*?)'.*\n"
    #    r".*'Date of death':\s*'(.*?)'.*"
    #)
    #pattern3= (
    #r".*'Name of casualty or group':\s*'(.*?)'.*"
    #r".*'Gender or age group':\s*'(.*?)'.*"
    #r".*'Cause of death':\s*'(.*?)'.*"
    #r".*'Type':\s*'(.*?)'.*"
    #r".*'Actor':\s*'(.*?)'.*"
    #r".*'Place of death':\s*'(.*?)'.*"
    #r".*'Date of death':\s*'(.*?)'.*"
    #)
    #pattern4 = (
    #    r'.*"Name of casualty or group":\s*"(.*?)".*\n'
    #    r'.*"Gender or age group":\s*"(.*?)".*\n'
    #    r'.*"Cause of death":\s*"(.*?)".*\n'
    #    r'.*"Type":\s*"(.*?)".*\n'
    #    r'.*"Actor":\s*"(.*?)".*\n'
    #    r'.*"Place of death":\s*"(.*?)".*\n'
    #    r'.*"Date of death":\s*"(.*?)".*'
    #)
    pattern0 = (
        r'.*Name of casualty or group.*:\s*(.*?),.*\n'
        r'.*Gender or age group.*:\s*(.*?),.*\n'
        r'.*Cause of death.*:\s*(.*?),.*\n'
        r'.*Type.*:\s*(.*?),.*\n'
        r'.*Actor.*:\s*(.*?),.*\n'
        r'.*Place of death.*:\s*(.*?),.*\n'
        r'.*Date of death.*:\s*(.*?)\s*\n'
    )
    pattern1 = (
        r'\{.*Name of casualty or group.*:\s*(.*?),.*'
        r'.*Gender or age group.*:\s*(.*?),.*'
        r'.*Cause of death.*:\s*(.*?),.*'
        r'.*Type.*:\s*(.*?),.*'
        r'.*Actor.*:\s*(.*?),.*'
        r'.*Place of death.*:\s*(.*?),.*'
        r'.*Date of death.*:\s*(.*?)\}.*'
    )
    
    


    matches = re.finditer(pattern0, text)

    for match in matches:
        content_dict = {
            "Name of casualty or group": match.group(1).replace('\"', ''),
            "Gender or age group": match.group(2).replace('\"', ''),
            "Cause of death": match.group(3).replace('\"', ''),
            "Type": match.group(4).replace('\"', ''),
            "Actor": match.group(5).replace('\"', ''),
            "Place of death": match.group(6).replace('\"', ''),
            "Date of death": match.group(7).replace('\"', ''),
        }
        content.append(content_dict)
    # Find all matches in the text
    matches = re.finditer(pattern1, text)

    for match in matches:
        content_dict = {
            "Name of casualty or group": match.group(1).replace('\"', ''),
            "Gender or age group": match.group(2).replace('\"', ''),
            "Cause of death": match.group(3).replace('\"', ''),
            "Type": match.group(4).replace('\"', ''),
            "Actor": match.group(5).replace('\"', ''),
            "Place of death": match.group(6).replace('\"', ''),
            "Date of death": match.group(7).replace('\"', ''),
        }
        content.append(content_dict)

    return content

import re

def extract_last_article(text):
    # Define the regex pattern to extract text between the specified phrases.
    pattern = r"BEGINNING OF THE ARTICLE(.*?)END OF THE ARTICLE"
    
    # Use re.DOTALL to ensure that the dot matches newlines as well
    matches = list(re.finditer(pattern, text, re.DOTALL))  # Get all matches

    # Check if there are any matches
    if matches:
        last_match = matches[-1]  # Get the last match
        return last_match.group(1).strip()  # Return the extracted text without leading/trailing whitespace
    
    return None  # If no matches found, return None

def extract_between_three_asterisks(text):
    # Find all occurrences of the asterisk
    asterisks = text.split('***')

    # Check if there are exactly two non-empty segments (meaning there are two asterisks)
    if len(asterisks) == 3 and asterisks[1] != '':
        # Return the content between the first and last asterisks
        return asterisks[1].strip()  # This is the part between the first and second asterisks
    else:
        return None  # Return None for invalid cases


def print_readable_dict(data):
    """
    Prints a dictionary in a readable JSON-like format.

    Args:
        data (dict): The dictionary to be printed.

    """
    print(json.dumps(data, indent=4, ensure_ascii=False))


light_paraphrase_prompt = """
Below is an article about an event during the Syrian war from 2015 to 2017:

{article}

Here is the related data extracted from the article in JSON format:

{extracted_data}

Please lightly paraphrase the article while changing either one or two facts from the extracted data:

{change}

Use iformation from JSON format except for the changed indoemation to create lightly paraphrased article. Make sure that all the information in the article is included. Ensure that the generated article is coherent. Print the updated JSON data as well, using double quotes and not single quotes. Do not provide any additional information except for the changed article and the changed JSON dataset.

The paraphrased article should begin and end with an asterisk sign '*'.
"""
change_one_information_prompt = """You are a scientist analyzing articles from the well known scientific dataset FA-KES. Here is an article:

BIGGINING OF THE ARTICLE
{article}
END OF THE ARTICLE

Here is the related data extracted from the article in JSON format:

{extracted_data}

Please, follow the instructions:
Point 1: {change_data_1}
Point 2: Create a new JSON file, which is the same as the old one, with the exception of {change_topic_1}, which is given new information. JSON file should be displayed in standard notation, with use of double and not single quotes.
Point 3: Generate a new article in which you will change the information for {change_topic_1}. Ensure that all occurrences of {change_topic_1} are changed. You must preserve all other information from article.
Point 4: Paraphrase the article generated in Point 3 once more and ensure that {change_topic_1} is changed acording to Point 1. This article should begin with the phrase "BEGINNING OF THE ARTICLE" and end with "END OF THE ARTICLE". Make sure you include those phrases.
"""


import ollama

prompt1 = """You are a journalist tasked with analyzing an article that reports on casualties related to the war in Syria. Your goal is to extract specific information regarding casualties mentioned in the article.

Please extract the following details of casualties in the news in JSON format.
{{
    "Name of casualty or group": The individual's name or the name of the group associated with the casualty.,
    "Gender or age group": Indicate if the person is male or female, or specify their age group (e.g., child, adult, senior).,
    "Cause of death": (e.g., shooting, shelling, chemical weapons, etc.),
    "Type": Classify the casualty as a civilian or non-civilian (e.g., military personnel).,
    "Actor": Identify the actor involved in the incident, such as rebel groups, Russian forces, ISIS, the Syrian army, U.S. military, etc.,
    "Place of death": Specify the location where the casualty occurred (e.g., Aleppo, Damascus, Homs, Idlib, Raqqa, Daraa, Deir ez-Zor, Qamishli, Palmyra, etc.).,
    "Date of death": Provide the date when the casualty occurred.
}}

EXAMPLE of an output:
{{
    "Name of casualty or group": Not specifically named; referred to casualties at the airbase,
    "Gender or age group": Not specified in the article.,
    "Cause of death": explosion,
    "Type": Non-civilian (military personnel at the airbase).,
    "Actor": U.S. military (responsible for the missile strike). Syrian Arab Army (the airbase was providing support for army operations).,
    "Place of death": Near Homs, Syria (specific airbase mentioned).,
    "Date of death": April 7, 2017
}}

BIGGINING OF THE ARTICLE
{article}
END OF THE ARTICLE

Ensure that the extracted information is as accurate and detailed as possible. Take context into account, and if certain data points are not available or mentioned in the article, output "Not available". Try to incorporate all casualties in one file.
"""

try:
    del list
except:
    pass
indices = random.sample(list(range(len(true_articles))), 60)
results_new_prompt = []
for index in indices:

        
    print(list(true_articles['article_content'])[index])
    response = ollama.chat(model='llama3.1:8b', messages=[
        {
            'role': 'user',
            'content': prompt1.format(article = list(true_articles['article_content'])[index]),
            'temperature':0.2,
        },
    ])

    generated = response['message']['content']
    json_list = find_json(generated)

    if json_list == []:
        print("No JSON found. This is generated text: \n", generated)
    if json_list != []:
        for json_dict in json_list[:2]:
            print("THIS IS DICTIONARY THAT WAS EXTRACTED")
            print_readable_dict(json_dict)
            topic_to_change = random.sample(CHANGES3, 2)
            print("We will change topics:", topic_to_change[0][0], " and ", topic_to_change[1][0])
            response = ollama.chat(model='llama3.1:8b', messages=[
                {
                    'role': 'user',
                    'content': change_one_information_prompt.format(article = list(true_articles['article_content'])[index],
                                                               extracted_data = json.dumps(json_dict, indent=4, ensure_ascii=False),
                                                               change_topic_1 = topic_to_change[0][0],
                                                               change_data_1 = topic_to_change[0][1]
                                                               ),
                    'temperature':0.2,
                },
            ])

            generated = response['message']['content']
            changed_json = find_json(generated)
            if len(changed_json) == 0:
                changed_json = None
            else:
                changed_json = changed_json[0]
            changed_article = extract_last_article(generated)

            if changed_article == None or changed_json == None:
                print("We did not get through the first round!")
                print("THIS IS THE FIRST GENERATED TEXT \n\n", generated)
                print("EXTRACTED ARTICLE: ", changed_article )
                print("EXTRACTED JSON: ", changed_json)
                results_new_prompt.append((index, None, None ,list(true_articles['article_content'])[index], json_dict))
            else: #else update the second time
                response = ollama.chat(model='llama3.1:8b', messages=[
                {
                    'role': 'user',
                    'content': change_one_information_prompt.format(article = changed_article,
                                                               extracted_data = json.dumps(changed_json, indent=4, ensure_ascii=False),
                                                               change_topic_1 = topic_to_change[1][0],
                                                               change_data_1 = topic_to_change[1][1]
                                                               ),
                    'temperature':0.2,
                },
                ])

                generated = response['message']['content']
                twice_changed_json = find_json(generated)
                if len(twice_changed_json) == 0:
                    twice_changed_json = None
                else:
                    twice_changed_json = twice_changed_json[0]
                twice_changed_article = extract_last_article(generated)
                    
                print("THIS IS THE SECOND GENERATED TEXT \n\n", generated)
                print("EXTRACTED ARTICLE: ", twice_changed_article)
                print("EXTRACTED JSON: ", twice_changed_json)
                results_new_prompt.append([index, twice_changed_article, twice_changed_json, list(true_articles['article_content'])[index], json_dict, topic_to_change[0][0], topic_to_change[0][1]])




426
Date of publication 19 October 2016 Russia has accused Belgium of killing civilians in Aleppo while partaking in coalition strikes. Tags Belgium Syria coalition air raids strikes Aleppo Russia has accused Belgium a member of a US-led coalition fighting militants in Syria of killing six civilians in airstrikes in the Aleppo region but the claim was swiftly denied by Brussels. "Six people were killed and four people injured to various degrees as a result of bombing that destroyed two homes" in the village of Hassajik in the Aleppo region early on Tuesday the Russian defence ministry said in a statement. "Two of Belgiums F-16s were identified at that time in the region" it said late Tuesday. "There were no Russian and Syrian air force planes in this region." Belgian defence ministry spokeswoman Laurence Mortier denied the countrys air force was active in the area at the time. Moscow had announced on Tuesday that the Russian and Syrian air forces had stopped bombing Aleppo to pave the 

In [3]:
import csv
import json

# Define the CSV file name
csv_file = 'fake_articles_one_by_one_generation_new2.csv'

def filter_tuples(tuples_list):
    """Filter out tuples that contain None."""
    return [tup for tup in tuples_list if None not in tup]

def append_to_csv(data, file_name):
    """Append filtered data to a CSV file."""
    with open(file_name, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Convert dictionaries in the tuples to JSON strings
        for row in data:
            converted_row = [json.dumps(item) if isinstance(item, dict) else item for item in row]
            writer.writerow(converted_row)
        print(f'Data has been appended to {file_name}')

# Assuming results_new_prompt is already defined and populated
append_to_csv(results_new_prompt, csv_file)
filtered_data = filter_tuples(results_new_prompt)

print(f"Number of valid entries: {len(filtered_data)}")



Data has been appended to fake_articles_one_by_one_generation_new2.csv
Number of valid entries: 1


In [4]:
import csv
import json
import ast

def read_csv_as_list_of_lists(filename):
    """Reads a CSV file and returns its contents as a list of lists."""
    with open(filename, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        return [row for row in reader]

def discard_third_entry(data):
    """Removes the third entry from each row in the list of lists."""
    filtered_data = [row[:2] + row[3:] for row in data]  # Keep all except the 3rd item
    return filtered_data

def convert_strings_to_json(data):
    """Converts JSON format strings within nested lists into Python dictionaries."""
    for sublist in data:
        # Convert the string at the third position (index 2)
        if isinstance(sublist[2], str):
            try:
                sublist[2] = ast.literal_eval(sublist[2])
            except json.JSONDecodeError as e:
                print(f"Failed to decode JSON at index 2: {e}")
                print(sublist[2])

        # Convert the string at the fifth position (index 4)
        if isinstance(sublist[4], str):
            try:
                sublist[4] = ast.literal_eval(sublist[4])
            except json.JSONDecodeError as e:
                print(f"Failed to decode JSON at index 4: {e}")
            

    return data

# Read the CSV file
csv_file = 'fake_articles_one_by_one_generation_new2.csv'
data2 = read_csv_as_list_of_lists(csv_file)

# Discard the third entry of each element
filtered_data2 = data2

def filter_tuples(tuples_list):
    # Use a list comprehension to construct a new list
    filtered_list = [tup for tup in tuples_list if None not in tup and '' not in tup]
    return filtered_list


filtered_data2 = filter_tuples(filtered_data2)
print(len(filtered_data2))

filtered_data2 = convert_strings_to_json(filtered_data2)
true_events_in_JSON2 = [row[4] for row in filtered_data2]

2


In [5]:
import random
import ollama
import json
import re


LIST_OF_CHANGES = [
    ["Name of casualty or group", "Name of casualty or group is the name of the casualty or the name of the group associated with the casualty.", """Name of casualty or group is the name of the casualty or the name of the group associated with the casualty. Is the "Name of casualty or group" in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Gender or age group", "Gender or age group indicates if the casualty is male or female, or specifies their age group.", """Gender or age group indicates if the casualty is male or female, or specifies their age group. Is the "Gender or age group" of the casualty in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Cause of death", "Cause of death is the weapon used in the attack (e.g., shooting, shelling, chemical weapons, etc.).", """Cause of death is the weapon used in the attack (e.g., shooting, shelling, chemical weapons, etc.). Is the "Cause of death" in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Type", "Type is the information if the casualty is civilian or non-civilian.", """Type is the information if the casualty is civilian or non-civilian. Is the "Type" (civilian or non-civilian) in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Actor", "The actor is the person or group responsible for the attack.", """The actor is the person or group responsible for the attack. Is the "Actor" (group responsible for the attack) in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Place of death", "Place of death refers to the cities or areas where the attacks happened.", """Place of death refers to the cities or areas where the attacks happened. Is the "Place of death" in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Date of death", "The date of death refers to when the attack happened in the article.", """The date of death refers to when the attack happened in the article. Is the "Date of death" in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""]
]

def find_first_true_or_false(text):
    """Finds the first occurrence of the words 'true' or 'false' in the text and returns it with its position."""
    # Regular expression pattern to match 'true' or 'false'
    pattern = r'\b(The answer is true|The answer is false|The answer is True|The answer is False|The answer is TRUE|The answer is FALSE|the answer is true|the answer is false|the answer is True|the answer is False|the answer is TRUE|the answer is FALSE)\b'

    # Search for the first occurrence
    match = re.search(pattern, text, re.IGNORECASE)  # Using IGNORECASE to match 'True', 'False' etc.

    if match:
        word = match.group(1)  # Get the matched word ('true' or 'false')
        return word.lower()
    else:
        return None # Return None if not found, and -1 for position


prompt = """You will be given an event in Syrian war dated from 2013 to 2017. Pleas read and understand the event that is stored in JSON format:

{events}

You must check that the event presented in the article is from among previously red events. Try to check that all the information matches. That means that "Name of casualty or group" "Gender or age group", "Cause of death", "Type", "Actor", "Place of death" and "Date of death" must match.

{article}

If the article matches some event print 'true', else print 'false'. In addition to 'true' or 'false' provide explanation.
"""

prompt_one_by_one = """Carefully read throug the article and try to understand its {topic}. {meaning_of_topic}
{article}

{question}
"""


data_soft_comparison = []
for i, data2 in enumerate(filtered_data2):
    num_of_false = 0
    num_of_true = 0

    for  topic, meaning_of_topic, question in LIST_OF_CHANGES:
        response = ollama.chat(model='llama3.1:8b', messages=[
                {
                    'role': 'user',
                    'content': prompt_one_by_one.format(article = data2[3], meaning_of_topic = meaning_of_topic, question = question.format(data2[4][topic]), topic = topic),
                    'temperature':0.2,
                },
            ])

        generated = response['message']['content']


        opinion = find_first_true_or_false(generated)
        if opinion == "the answer is true":
            num_of_true += 1
        elif opinion == "the answer is false":
            num_of_false += 1
        else:
            pass
    print('True article:')
    if num_of_false + num_of_true == len(LIST_OF_CHANGES):
        print(num_of_false)
        data_soft_comparison.append([[num_of_false, True]])
    else:
        print(num_of_false, "Missing!!!")
        data_soft_comparison.append([[num_of_false, num_of_false + num_of_true]])

    num_of_false = 0
    num_of_true = 0

    for  topic, meaning_of_topic, question in LIST_OF_CHANGES:
        response = ollama.chat(model='llama3.1:8b', messages=[
                {
                    'role': 'user',
                    'content': prompt_one_by_one.format(article = data2[1], meaning_of_topic = meaning_of_topic, question = question.format(data2[4][topic]), topic = topic),
                    'temperature':0.2,
                },
            ])

        generated = response['message']['content']
 
        opinion = find_first_true_or_false(generated)
        if opinion == "the answer is true":
            num_of_true += 1
        elif opinion == "the answer is false":
            num_of_false += 1
        else:
            pass
    print('False article:')
    if num_of_false + num_of_true == len(LIST_OF_CHANGES):
        print(num_of_false)
        data_soft_comparison[-1].append([num_of_false, True])
    else:
        print(num_of_false, "Missing!!!")
        data_soft_comparison[-1].append([num_of_false, num_of_false + num_of_true])

True article:
0
False article:
2
True article:
1
False article:
1


In [None]:
import random
import ollama
import json
import re
import pandas as pd

def extract_and_change_articles(true_articles, name_of_the_model, size_of_sample=60):
    """Extracts information from articles and applies two rounds of information changes.
    
    Args:
        true_articles (DataFrame): DataFrame containing the articles.
        name_of_the_model (str): The name of the language model to use.
        size_of_sample (int): Number of articles to process. Default is 60.
        
    Returns:
        list: A list of results containing the indices, changed articles, twice changed JSON, original articles, etc.
    """
    
    CHANGES3 = [
        # List of changes to apply
        ['actor', 'You need to choose different facts for the value "Actor". Changed facts must bear different meaning. Common actors of Syrian war are: rebel groups, Russian forces, ISIS, the Syrian army, USA army, etc.'],
        ['cause of death', 'You need to choose different facts for the value "Cause of death". Changed facts must bear different meaning. Examples are shooting, shelling, chemical weapons, explosions, etc.'],
        ['date of death', """You need to choose different dates for the value "Date of death". Changed facts must bear different meaning."""],
        ['Place of death', """You need to choose different facts for the value "Place of death". Changed facts must bear different meaning. Common places are Aleppo, Damascus, Homs, Idlib, Hasaka, Deir ez-Zor, Daraa, Qamishli or Tartus."""],
        ['name of casualty or group', 'You need to choose different facts for the value "Name of casualty or group". Changed facts must bear different meaning. Common examples are: Civilians (General category encompassing men, women, and children), Syrian Army (SAA), Free Syrian Army (FSA), National Liberation Front (NLF), Hay\'at Tahrir al-Sham (HTS), People\'s Protection Units (YPG), Syrian Democratic Forces (SDF), Islamic State (ISIS), Hezbollah,Russian Forces, The White Helmets (Syrian Civil Defence), Kurdish Female Fighters (YPJ), Foreign Fighters (joining various factions),Al-Nusra Front Commanders, Civilians in Refugee Camps']
    ]

    def find_json(text):
        # Function to find JSON-like content in the text
        content = []
        pattern0 = (
            r'.*Name of casualty or group.*:\s*(.*?),.*\n'
            r'.*Gender or age group.*:\s*(.*?),.*\n'
            r'.*Cause of death.*:\s*(.*?),.*\n'
            r'.*Type.*:\s*(.*?),.*\n'
            r'.*Actor.*:\s*(.*?),.*\n'
            r'.*Place of death.*:\s*(.*?),.*\n'
            r'.*Date of death.*:\s*(.*?)\s*\n'
        )
        pattern1 = (
            r'\{.*Name of casualty or group.*:\s*(.*?),.*'
            r'.*Gender or age group.*:\s*(.*?),.*'
            r'.*Cause of death.*:\s*(.*?),.*'
            r'.*Type.*:\s*(.*?),.*'
            r'.*Actor.*:\s*(.*?),.*'
            r'.*Place of death.*:\s*(.*?),.*'
            r'.*Date of death.*:\s*(.*?)\}.*'
        )
        
        matches = re.finditer(pattern0, text)

        for match in matches:
            content_dict = {
                "Name of casualty or group": match.group(1).replace('\"', ''),
                "Gender or age group": match.group(2).replace('\"', ''),
                "Cause of death": match.group(3).replace('\"', ''),
                "Type": match.group(4).replace('\"', ''),
                "Actor": match.group(5).replace('\"', ''),
                "Place of death": match.group(6).replace('\"', ''),
                "Date of death": match.group(7).replace('\"', ''),
            }
            content.append(content_dict)

        # Second pattern matching
        matches = re.finditer(pattern1, text)

        for match in matches:
            content_dict = {
                "Name of casualty or group": match.group(1).replace('\"', ''),
                "Gender or age group": match.group(2).replace('\"', ''),
                "Cause of death": match.group(3).replace('\"', ''),
                "Type": match.group(4).replace('\"', ''),
                "Actor": match.group(5).replace('\"', ''),
                "Place of death": match.group(6).replace('\"', ''),
                "Date of death": match.group(7).replace('\"', ''),
            }
            content.append(content_dict)

        return content

    def extract_last_article(text):
        # Function to extract the last article
        pattern = r"BEGINNING OF THE ARTICLE(.*?)END OF THE ARTICLE"
        matches = list(re.finditer(pattern, text, re.DOTALL))
        if matches:
            last_match = matches[-1]
            return last_match.group(1).strip()
        return None

    def print_readable_dict(data):
        # Function to print data in a readable JSON-like format
        print(json.dumps(data, indent=4, ensure_ascii=False))
    
    prompt1 = """..."""  # Truncated for brevity; fill with the full prompt content.

    indices = random.sample(list(range(len(true_articles))), size_of_sample)
    results = []
    for index in indices:

        article_content = list(true_articles['article_content'])[index]
        print(article_content)
        
        response = ollama.chat(model=name_of_the_model, messages=[
            {'role': 'user', 'content': prompt1.format(article=article_content), 'temperature': 0.2}
        ])

        generated = response['message']['content']
        json_list = find_json(generated)

        if json_list == []:
            print("No JSON found. This is generated text: \n", generated)
        else:
            for json_dict in json_list[:2]:
                print("THIS IS DICTIONARY THAT WAS EXTRACTED")
                print_readable_dict(json_dict)
                topic_to_change = random.sample(CHANGES3, 2)
                print("We will change topics:", topic_to_change[0][0], " and ", topic_to_change[1][0])
                
                response = ollama.chat(model=name_of_the_model, messages=[
                    {
                        'role': 'user',
                        'content': change_one_information_prompt.format(
                            article=article_content,
                            extracted_data=json.dumps(json_dict, indent=4, ensure_ascii=False),
                            change_topic_1=topic_to_change[0][0],
                            change_data_1=topic_to_change[0][1]
                        ),
                        'temperature': 0.2,
                    }
                ])

                generated = response['message']['content']
                changed_json = find_json(generated)
                changed_json = changed_json[0] if changed_json else None
                
                changed_article = extract_last_article(generated)

                if not changed_article or not changed_json:
                    print("We did not get through the first round!")
                    print("THIS IS THE FIRST GENERATED TEXT \n\n", generated)
                    print("EXTRACTED ARTICLE: ", changed_article)
                    print("EXTRACTED JSON: ", changed_json)
                    results.append((index, None, None, article_content, json_dict))
                else:
                    response = ollama.chat(model=name_of_the_model, messages=[
                        {
                            'role': 'user',
                            'content': change_one_information_prompt.format(
                                article=changed_article,
                                extracted_data=json.dumps(changed_json, indent=4, ensure_ascii=False),
                                change_topic_1=topic_to_change[1][0],
                                change_data_1=topic_to_change[1][1]
                            ),
                            'temperature': 0.2,
                        }
                    ])

                    generated = response['message']['content']
                    twice_changed_json = find_json(generated)
                    twice_changed_json = twice_changed_json[0] if twice_changed_json else None
                    
                    twice_changed_article = extract_last_article(generated)
                        
                    print("THIS IS THE SECOND GENERATED TEXT \n\n", generated)
                    print("EXTRACTED ARTICLE: ", twice_changed_article)
                    print("EXTRACTED JSON: ", twice_changed_json)
                    results.append([index, twice_changed_article, twice_changed_json, article_content, json_dict, topic_to_change[0][0], topic_to_change[0][1]])
    
    return results

# Example usage:
# results = extract_and_change_articles(true_articles, 'language_model_name', 60)


In [None]:
import subprocess

def pull_model(model_name):
    try:
        # Run the Ollama pull command
        result = subprocess.run(
            ["ollama", "pull", model_name],
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        print("Model pulled successfully:")
        print(result.stdout)
    except subprocess.CalledProcessError as e:
        print("An error occurred while pulling the model:")
        print(e.stderr)

# Example usage
pull_model("mistral:7b")




Model pulled successfully:



In [None]:
#This does not work!!!!!!
import ollama

response = ollama.chat(model="mistral:7b", messages=[
                    {
                        'role': 'user',
                        'content': "Hello how are you",
                        'temperature':0.2,
                    },
                ])

generated = response['message']['content']
print(generated)

In [5]:
import ollama

response = ollama.chat(model="Gemma2:9b", messages=[
                    {
                        'role': 'user',
                        'content': "Hello how are you",
                        'temperature':0.2,
                    },
                ])

generated = response['message']['content']
print(generated)

As an AI, I don't have feelings or experiences like humans do. But thanks for asking!

How can I help you today? 😊


In [1]:

# Facts for fake article generation
#_________________________________________________________________
CHANGES3 = [
        #['actor', 'The actor is the person or group responsible for the attack. You need to choose different facts for the value "Actor". Changed facts must bear different meaning. Common actors of Syrian war are: rebel groups, Russian forces, ISIS, the Syrian army, USA army, etc.'],
        ['cause of death', 'Cause of death is the weapon used in the attack. You need to choose different facts for the value "Cause of death". Changed facts must bear different meaning. Examples are shooting, shelling, chemical weapons, explosions, etc.'],
        ['date of death', 'The date of death refers to time where the attack happened in the article. You need to choose different dates for the value "Date of death". Changed facts must bear different meaning.'],
        ['Place of death', 'Place of death refers to the cities or areas where the attacks happened. You need to choose different facts for the value "Place of death". Changed facts must bear different meaning. Common places are Aleppo, Damascus, Homs, Idlib, Hasaka, Deir ez-Zor, Daraa, Qamishli or Tartus.'],
        ['name of casualty or group', 'Name of casualty or group is the name of the casualty or the name of the group associated with the casualty. You need to choose different facts for the value "Name of casualty or group". Changed facts must bear different meaning. Common examples are: Civilians (General category encompassing men, women, and children), Syrian Army (SAA), Free Syrian Army (FSA), National Liberation Front (NLF), Hay\'at Tahrir al-Sham (HTS), People\'s Protection Units (YPG), Syrian Democratic Forces (SDF), Islamic State (ISIS), Hezbollah, Russian Forces, The White Helmets (Syrian Civil Defence), Kurdish Female Fighters (YPJ), Foreign Fighters (joining various factions), Al-Nusra Front Commanders, Civilians in Refugee Camps.'],
    ]
CHANGES_AGGRESSIVE = [
        ['actor', 'The actor is the person or group responsible for the attack. You need to choose totally different facts for the values of "Actor". Changed values must bear different meaning. Common actors of Syrian war are: rebel groups, Russian forces, ISIS, the Syrian army, USA army, etc.'],
        ['cause of death', 'Cause of death is the weapon used in the attack. You need to choose totally different facts for the values of "Cause of death". Changed values must bear different meaning. Examples are shooting, shelling, chemical weapons, explosions, etc.'],
        ['date of death', 'The date of death refers to time where the attack happened in the article. You need to choose totally different dates for the values of "Date of death". Changed values must bear different meaning. Use date that precedes the date for up to 1 year.'],
        ['Place of death', 'Place of death refers to the cities or areas where the attacks happened. You need to choose totally different facts for the values of "Place of death". Changed values must bear different meaning. Common places are Aleppo, Damascus, Homs, Idlib, Hasaka, Deir ez-Zor, Daraa, Qamishli or Tartus.'],
        ['name of casualty or group', 'Name of casualty or group is the name of the casualty or the name of the group associated with the casualty. You need to choose totally different facts for the values of "Name of casualty or group". Changed values must bear different meaning. Common examples are: Civilians (General category encompassing men, women, and children), Syrian Army (SAA), Free Syrian Army (FSA), National Liberation Front (NLF), Hay\'at Tahrir al-Sham (HTS), People\'s Protection Units (YPG), Syrian Democratic Forces (SDF), Islamic State (ISIS), Hezbollah, Russian Forces, The White Helmets (Syrian Civil Defence), Kurdish Female Fighters (YPJ), Foreign Fighters (joining various factions), Al-Nusra Front Commanders, Civilians in Refugee Camps.'],
    ]

TOPICS = [["Name of casualty or group", " represents the casualties' names or the names of the groups associated with the casualties."],
                ["Gender or age group", " of casualty indicates if the casualties are male or female, or specify their age group (e.g., child, adult, senior)."],
                ["Cause of death", " specifies the weapons used by the aggressor (e.g., shooting, shelling, chemical weapons, etc.)"],
                ["Type", " of casualty classifies the casualties as a civilian or non-civilian (e.g., military personnel are non-civilians)."],
                ["Actor", " identifies the actors responsible for the incident, such as rebel groups, Russian forces, ISIS, the Syrian army, U.S. military, etc."],
                ["Place of death", " specifies the locations where the attacks occurred (e.g., Aleppo, Damascus, Homs, Idlib, Raqqa, Daraa, Deir ez-Zor, Qamishli, Palmyra, etc.)."],
                ["Date of death", " provides the dates when the attacks occurred."]]

prompt_for_one_by_one_fact_extraction_concise_version = """You are a journalist tasked with analyzing an article that reports on casualties related to the war in Syria. Your goal is to extract specific information regarding attacks and casualties mentioned in the article.

    BEGINNING OF THE ARTICLE
    {article}
    END OF THE ARTICLE

    Please extract the facts about the {topic} from the article. {topic}{topic_content}. Be as concise as you can and do not output information that does not relate to {topic}, and if information is not present in the article, output "No information". Output should not exceed 15 words and should be written in the following format:
    BEGINNING OF FACTS
    <Display the extracted facts.>
    END OF FACTS
    """

light_paraphrase_prompt = """
    Below is an article about an event during the Syrian war from 2015 to 2017:

    {article}

    Here is the related data extracted from the article in JSON format:

    {extracted_data}

    Please lightly paraphrase the article while changing either one or two facts from the extracted data:

    {change}

    Use iformation from JSON format except for the changed indoemation to create lightly paraphrased article. Make sure that all the information in the article is included. Ensure that the generated article is coherent. Print the updated JSON data as well, using double quotes and not single quotes. Do not provide any additional information except for the changed article and the changed JSON dataset.

    The paraphrased article should begin and end with an asterisk sign '*'.
    """

change_one_information_prompt = """You are a scientist analyzing articles from the well known scientific dataset FA-KES. Here is an article:

    BEGINNING OF THE ARTICLE
    {article}
    END OF THE ARTICLE

    Here is the related data extracted from the article in JSON format:

    {extracted_data}

    Please, follow the instructions:
    Point 1: {change_data_1}
    Point 2: Create a new JSON file, which is the same as the old one, with the exception of {change_topic_1}, which is given new information. JSON file should be displayed in standard notation, with use of double and not single quotes.
    Point 3: Paraphrase a new article in which you will change the information for {change_topic_1} according to decision made in Point 1. Ensure that all occurrences of {change_topic_1} are changed. You must preserve all other information from article. This article should begin with the phrase "BEGINNING OF THE ARTICLE" and end with "END OF THE ARTICLE". Make sure you include those phrases.
    """

change_one_information = {"fact_transformation_abdul": """ You have a text as follows:  
BEGINNING OF THE ARTICLE
    {article}
END OF THE ARTICLE

The following facts have already been extracted from this article: {facts}. 

1. Please replace the fact {change_topic_1} in this article with a different value. 
2. Provide me with the same JSON file, but ensure that the value of {change_topic_1} is updated to the new value you used.
3. Please give me the news article with replaced value and include the phrases "BEGINNING OF THE ARTICLE" and "END OF THE ARTICLE" at the beginning and end of the paraphrased version, respectively."""

, "paraphrase_abdul": """You have a text as follows:  
BEGINNING OF THE ARTICLE
    {article}
END OF THE ARTICLE

The following facts have already been extracted from this article: {facts}. 

1. Please replace the fact {change_topic_1} in this article with a different value. 
2. Provide me with the same JSON file, but ensure that the value of {change_topic_1} is updated to the new value you used.
3. After making the replacement, please paraphrase the news article and include the phrases "BEGINNING OF THE ARTICLE" and "END OF THE ARTICLE" at the beginning and end of the paraphrased version, respectively."""

, "summarize_abdul": """You have a text as follows:  
BEGINNING OF THE ARTICLE
    {article}
END OF THE ARTICLE

The following facts have already been extracted from this article: {facts}. 

1. Please replace the fact {change_topic_1} in this article with a different value. 
2. Provide me with the same JSON file, but ensure that the value of {change_topic_1} is updated to the new value you used.
3. After making the replacement, please summarize the news article and include the phrases "BEGINNING OF THE ARTICLE" and "END OF THE ARTICLE" at the beginning and end of the paraphrased version, respectively."""
, "paraphrase": """You are a scientist analyzing articles from the well known scientific dataset FA-KES. Here is an article:

    BEGINNING OF THE ARTICLE
    {article}
    END OF THE ARTICLE

    Here is the related data extracted from the article in JSON format:

    {facts}

    Please, follow the instructions:
    Point 1: {change_data_1}
    Point 2: Create a new JSON file, which is the same as the old one, with the exception of {change_topic_1}, which is given new information given in Point 1. JSON file should be displayed in standard notation, with use of double and not single quotes, and should contain all the key values as the original one.
    Point 3: Paraphrase a new article in which you will change the information for {change_topic_1} according to decision made in Point 1. Ensure that all occurrences of {change_topic_1} are changed. You must preserve all other information from article. This article should begin with the phrase "BEGINNING OF THE ARTICLE" and end with "END OF THE ARTICLE". Make sure you include those phrases.
    """
, "paraphrase_aggressive": """You are a scientist analyzing articles from the well known scientific dataset FA-KES which is a historical document about Syrian war from 2013 to 2017. Here is an article:

    BEGINNING OF THE ARTICLE
    {article}
    END OF THE ARTICLE

    Here is the related data extracted from the article in JSON format:

    {facts}

    Please, follow the instructions:
    Point 1: {change_data_1}
    Point 2: Create a new JSON file, which is the same as the old one, with the exception of {change_topic_1}, which is given new information given in Point 1. JSON file should be displayed in standard notation, with use of double and not single quotes, and should contain all the key values as the original one.
    Point 3: Paraphrase a new article in which you will change the information for {change_topic_1} according to decision made in Point 1. Ensure that all occurrences of {change_topic_1} are changed and included in new article. You must preserve all other information from article. This article should begin with the phrase "BEGINNING OF THE ARTICLE" and end with "END OF THE ARTICLE". Make sure you include those phrases.
    Point 4: Check again the newly created article. All occurrences of {change_topic_1} must be changed, and details from original article must be preserved in a consistent way. If you spot any problem paraphrase article once more.
    """
}

prompt_fact_extraction = """You are a journalist tasked with analyzing an article that reports on casualties related to the war in Syria. Your goal is to extract specific information regarding casualties mentioned in the article.

    Please extract the following details of casualties in the news in JSON format.
    {{
        "Name of casualty or group": The individual's names or the names of the groups associated with the casualties.,
        "Gender or age group": Indicates if the persons is male or female, or specify their age groups (e.g., child, adult, senior).,
        "Cause of death": (e.g., shooting, shelling, chemical weapons, etc.),
        "Type": Classify the casualties as a civilian or non-civilian (e.g., military personnel are civilian).,
        "Actor": Identify the actors involved in the incidents, such as rebel groups, Russian forces, ISIS, the Syrian army, U.S. military, etc.,
        "Place of death": Specify the locations of the attack (e.g., Aleppo, Damascus, Homs, Idlib, Raqqa, Daraa, Deir ez-Zor, Qamishli, Palmyra, etc.).,
        "Date of death": Provide the dates when the attacks occurred.
    }}

    EXAMPLE of an output:
    {{
        "Name of casualty or group": Not specifically named; referred to casualties at the airbase,
        "Gender or age group": Not specified in the article.,
        "Cause of death": explosion,
        "Type": Non-civilian (military personnel at the airbase).,
        "Actor": U.S. military (responsible for the missile strike). Syrian Arab Army (the airbase was providing support for army operations).,
        "Place of death": Near Homs, Syria (specific airbase mentioned).,
        "Date of death": April 7, 2017
    }}

    BEGINNING OF THE ARTICLE
    {article}
    END OF THE ARTICLE

    Ensure that the extracted information is as accurate and detailed as possible. Take context into account, and if certain data points are not available or mentioned in the article, output "Not available". Try to incorporate all casualties in one file.
    """

prompt_fact_extraction_abdul = """You are a journalist assigned to analyze a war-related article that reports on casualties. Your task is to identify specific events, facts, or information regarding casualties mentioned in the article.

Please extract the following details about the casualties mentioned in the news article, ensuring that the information is as accurate and detailed as possible. If certain details are missing or not provided in the article, write "Not available". Finally, compile the extracted information into one JSON file, which contains informtion about all casualties in the article.

Details to extract:
{{
    "Name of casualty or group": The names of the individuals or groups involved with casualties.
    "Gender or age group": Specifie if the persons is male or female, or mention their age group (e.g., child, adult, senior).
    "Cause of death": (e.g., shooting, shelling, chemical weapons, etc.)
    "Type": Determine whether the casualties are civilians or non-civilians (e.g., military personnel).
    "Actor": Identify the parties involved in the incident, such as rebel groups, Russian forces, ISIS, the Syrian army, U.S. military, etc.
    "Place of death": Indicate the locations where the casualties occurred (e.g., Aleppo, Damascus, Homs, Idlib, Raqqa, Daraa, Deir ez-Zor, Qamishli, Palmyra, etc.).
    "Date of death": Provide the date when the attacks accured.
}}
Example JSON output:
{{
    "Name of casualty or group": "Abu Abdelrahman Salqeen, Ahrar al-Sham members, Hassan Abboud, Islamic State leaders, Maher Meshaal (Abu Hajar al-Hadrami), Hizbollah fighters",
    "Gender or age group": "males",
    "Cause of death": "suicide bombing, bombing, airstrikes, combat",
    "Type": "non-civilian",
    "Actor": "Ahrar al-Sham, U.S. military, Syrian Army, Hizbollah",
    "Place of death": "Idlib, Hasakeh, Zabadani",
    "Date of death": "07/14/2015, 09/2014, 07/2015"
}}

{article}
"""

#Prompts for fake detector
#_____________________________________________________________________

LIST_OF_CHANGES = [
    ["Name of casualty or group", "Name of casualty or group is the name of the casualty or the name of the group associated with the casualty.", """Name of casualty or group is the name of the casualty or the name of the group associated with the casualty. Is the "Name of casualty or group" in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Gender or age group", "Gender or age group indicates if the casualty is male or female, or specifies their age group.", """Gender or age group indicates if the casualty is male or female, or specifies their age group. Is the "Gender or age group" of the casualty in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Cause of death", "Cause of death is the weapon used in the attack (e.g., shooting, shelling, chemical weapons, etc.).", """Cause of death is the weapon used in the attack (e.g., shooting, shelling, chemical weapons, etc.). Is the "Cause of death" in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Type", "Type is the information if the casualty is civilian or non-civilian.", """Type is the information if the casualty is civilian or non-civilian. Is the "Type" (civilian or non-civilian) in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Actor", "The actor is the person or group responsible for the attack.", """The actor is the person or group responsible for the attack. Is the "Actor" (group responsible for the attack) in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Place of death", "Place of death refers to the cities or areas where the attacks happened.", """Place of death refers to the cities or areas where the attacks happened. Is the "Place of death" in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""],
    ["Date of death", "The date of death refers to thime where the attack happened in the article.", """The date of death refers to when the attack happened in the article. Is the "Date of death" in the article approximately coherent with this description: {}? If the description comes from the article, output "The answer is true" and otherwise output "The answer is false". In addition to "The answer is true" or "The answer is false" label provide short explanation. Do not be to specific, but consider if the information matches overall."""]
]


prompt = """You will be given an event in Syrian war dated from 2013 to 2017. Pleas read and understand the event that is stored in JSON format:

{events}

You must check that the event presented in the article is from among previously red events. Try to check that all the information matches. That means that "Name of casualty or group" "Gender or age group", "Cause of death", "Type", "Actor", "Place of death" and "Date of death" must match.

{article}

If the article matches some event print 'true', else print 'false'. In addition to 'true' or 'false' provide explanation.
"""

prompt_one_by_one = """Carefully read throug the article and try to understand its {topic}. {meaning_of_topic}
{article}

{question}
"""


# imports 

import random
import ollama
import json
import re
import pandas as pd

# Function used for article generation

def find_json(text):
        """Function to find JSON-like content in the text."""
        content = []
        pattern0 = (
            r'.*Name of casualty or group.*:\s*(.*),.*\n'
            r'.*Gender or age group.*:\s*(.*),.*\n'
            r'.*Cause of death.*:\s*(.*),.*\n'
            r'.*Type.*:\s*(.*),.*\n'
            r'.*Actor.*:\s*(.*),.*\n'
            r'.*Place of death.*:\s*(.*),.*\n'
            r'.*Date of death.*:\s*(.*)\s*\n'
        )
        pattern1 = (
            r'\{.*Name of casualty or group.*:\s*(.*),.*'
            r'.*Gender or age group.*:\s*(.*),.*'
            r'.*Cause of death.*:\s*(.*),.*'
            r'.*Type.*:\s*(.*),.*'
            r'.*Actor.*:\s*(.*),.*'
            r'.*Place of death.*:\s*(.*),.*'
            r'.*Date of death.*:\s*(.*)\}.*'
        )
        
        
        pattern2 = r'(?:"Name of casualty or group": \[([^\]]*?)\],?\n\s*"Gender or age group": \[([^\]]*?)\],?\n\s*"Cause of death": \[([^\]]*?)\],?\n\s*"Type": \[([^\]]*?)\],?\n\s*"Actor": \[([^\]]*?)\],?\n\s*"Place of death": \[([^\]]*?)\],?\n\s*"Date of death": \[([^\]]*?)\])'

        matches = re.finditer(pattern2, text)

        for match in matches:
            content_dict = {
                "Name of casualty or group": match.group(1).replace('\"', ''),
                "Gender or age group": match.group(2).replace('\"', ''),
                "Cause of death": match.group(3).replace('\"', ''),
                "Type": match.group(4).replace('\"', ''),
                "Actor": match.group(5).replace('\"', ''),
                "Place of death": match.group(6).replace('\"', ''),
                "Date of death": match.group(7).replace('\"', ''),
            }
            content.append(content_dict)

        
        matches = re.finditer(pattern0, text)

        for match in matches:
            content_dict = {
                "Name of casualty or group": match.group(1).replace('\"', ''),
                "Gender or age group": match.group(2).replace('\"', ''),
                "Cause of death": match.group(3).replace('\"', ''),
                "Type": match.group(4).replace('\"', ''),
                "Actor": match.group(5).replace('\"', ''),
                "Place of death": match.group(6).replace('\"', ''),
                "Date of death": match.group(7).replace('\"', ''),
            }
            content.append(content_dict)

        matches = re.finditer(pattern1, text)

        for match in matches:
            content_dict = {
                "Name of casualty or group": match.group(1).replace('\"', ''),
                "Gender or age group": match.group(2).replace('\"', ''),
                "Cause of death": match.group(3).replace('\"', ''),
                "Type": match.group(4).replace('\"', ''),
                "Actor": match.group(5).replace('\"', ''),
                "Place of death": match.group(6).replace('\"', ''),
                "Date of death": match.group(7).replace('\"', ''),
            }
            content.append(content_dict)

        return content

def extract_last_article(text):
    """Function to extract the last article."""
    pattern = r"BEGINNING OF THE ARTICLE(.*?)END OF THE ARTICLE"
    matches = list(re.finditer(pattern, text, re.DOTALL))
    if matches:
        last_match = matches[-1]
        return last_match.group(1).strip()
    return None

def print_readable_dict(data):
    """Prints a dictionary in a readable JSON-like format."""
    print(json.dumps(data, indent=4, ensure_ascii=False))

def extract_fact_from_text(text):
    pattern = r"BEGINNING OF FACTS(.*?)END OF FACTS"
    match = re.search(pattern, text, re.DOTALL)
    if match:
        return match.group(1).strip()
    return None


#Generation of text
def generate_facts(article_content, name_of_the_model, print_generated_text = False):
        
    response = ollama.chat(model=name_of_the_model, messages=[
        {'role': 'user', 'content': prompt_fact_extraction_abdul.format(article=article_content), 'temperature': 0.2}
    ])

    generated = response['message']['content']
    if print_generated_text == True:
        print("Facts generated in normal fact_generator:\n ", generated)

    json_list = find_json(generated)
    if json_list == []:
        json_list  = None
  
    return json_list

def generate_facts_one_by_one(article_content, name_of_the_model, print_generated_text = False):
    json_dict = {}    
    for topic in TOPICS:
        response = ollama.chat(
            model=name_of_the_model,
            messages=[
                {
                    'role': 'user',
                    'content': prompt_for_one_by_one_fact_extraction_concise_version.format(
                        article=article_content,
                        topic=topic[0],
                        topic_content=topic[1]
                    ),
                    'temperature': 0.2,
                }
            ])
    
        generated = response['message']['content']
        if print_generated_text == True:
            print("Generated fact on topic {topic[0]}:", generated)
        json_dict[topic[0]] = extract_fact_from_text(generated)
        if None in list(json_dict.values()):
            json_list = None
        else: json_list = [json_dict]
    return json_list

def generate(true_articles, name_of_the_model, size_of_sample, type_of_generation = "normal", print_comments=False):
    indices = random.sample(list(range(len(true_articles))), size_of_sample)
    results = []
    for index in indices:
        article_content = list(true_articles['article_content'])[index]

        if type_of_generation == "normal":
            json_list = generate_facts(article_content, name_of_the_model, print_comments)
        elif type_of_generation == "one_by_one":
            json_list = generate_facts_one_by_one(article_content, name_of_the_model, print_comments)
        else:
            raise ValueError("argument 'type of generation' is not a valid string!")

        if json_dict == None:
            print("No JSON found in first step of extracting facts!")
        else:
            for json_dict in json_list:
                results.append(index, article_content, json_dict)
    
    column_names = ["index", "article_content", "json_dict"]
    results = pd.DataFrame(results, columns = column_names)
    return results





import random
import ollama
import json
import re
import pandas as pd

def extract_and_change_articles(true_articles, name_of_the_model, size_of_sample=5, type_of_generation = "normal", change_of_article = "paraphrase_abdul", print_comments = False):
    """Extracts information from articles and applies two rounds of information changes.

    Args:
        true_articles (DataFrame): DataFrame containing the articles.
        name_of_the_model (str): The name of the language model to use.
        size_of_sample (int): Number of articles to process. Default is 60.

    Returns:
        list: A list of results containing the indices, changed articles, changed JSON, original articles, etc.
    """
   
    already_made =  [293, 38, 167, 202, 406, 355, 132, 342, 56, 361, 248, 120, 114, 403, 391, 185, 15, 272, 186, 358,414, 139, 145, 269, 295, 158, 209, 17, 316, 160, 130, 182, 88, 320, 401, 257, 42, 125, 18, 261, 215, 343, 26, 368]

    indices = random.sample([n for n in range(len(true_articles)) if n not in already_made], size_of_sample)
    results = []
    for index in indices:
        article_content = list(true_articles['article_content'])[index]
        if print_comments:
            print("Original article: ", article_content)

            
        if type_of_generation == "normal":
            json_list = generate_facts(article_content, name_of_the_model, print_comments)
        elif type_of_generation == "one_by_one":
            json_list = generate_facts_one_by_one(article_content, name_of_the_model, print_comments)
        else:
            raise ValueError("argument 'type of generation' is not a valid string!")



        if json_list == None:
            print("No JSON found in first step of extracting facts!")
        else:
            for json_dict in json_list:
        
                print("THIS IS DICTIONARY THAT WAS EXTRACTED")
                print_readable_dict(json_dict)
                topic_to_change = random.sample(CHANGES3, 2)
                print("We will change topics:", topic_to_change[0][0], " and ", topic_to_change[1][0])

                response = ollama.chat(model=name_of_the_model, messages=[
                    {
                        'role': 'user',
                        'content': change_one_information[change_of_article].format(
                            article=article_content,
                            facts =json.dumps(json_dict, indent=4, ensure_ascii=False),
                            change_topic_1=topic_to_change[0][0],
                            change_data_1=topic_to_change[0][1],
                        ),
                        'temperature': 0.2,
                    }
                ])
                generated = response['message']['content']
                changed_json = find_json(generated)
                changed_json = changed_json[0] if changed_json else None

                changed_article = extract_last_article(generated)
                if not changed_article or not changed_json:
                    print("We did not get through the first round!")
                    if print_comments == True:
                        print("THIS IS THE FIRST GENERATED TEXT \n\n", generated)
                        print("EXTRACTED ARTICLE: ", changed_article)
                        print("EXTRACTED JSON: ", changed_json)
                    results.append((index, None, None, article_content, json_dict, topic_to_change[0][0], topic_to_change[1][0]))
                else:
                    response = ollama.chat(model=name_of_the_model, messages=[
                        {
                            'role': 'user',
                            'content': change_one_information[change_of_article].format(
                                article=changed_article,
                                facts =json.dumps(changed_json, indent=4, ensure_ascii=False),
                                change_topic_1=topic_to_change[1][0],
                                change_data_1=topic_to_change[1][1],
                            ),
                            'temperature': 0.2,
                        }
                    ])
                    generated = response['message']['content']
                    twice_changed_json = find_json(generated)
                    twice_changed_json = twice_changed_json[0] if twice_changed_json else None

                    twice_changed_article = extract_last_article(generated)
                    if print_comments == True:
                        print("THIS IS THE SECOND GENERATED TEXT \n\n", generated)
                        print("TWICE EXTRACTED ARTICLE: ", twice_changed_article)
                        print("TWICE EXTRACTED JSON: ", twice_changed_json)
                    results.append([index, twice_changed_article, twice_changed_json, article_content, json_dict, topic_to_change[0][0], topic_to_change[1][0]])

    columns_names = ["index", "transformed_article_text", "changed_facts","article_text", "facts", "first_changed_topic", "second_changed_topic"]
    results = pd.DataFrame(results, columns = columns_names)
    return results

def change_articles(articles_with_facts, name_of_the_model, size_of_sample=5, change_of_article = "paraphrase", print_comments = False):
    """Applies two rounds of information changes.

    Args:
        articles_with_facts: DataFrame containing the articles and facts.
        name_of_the_model (str): The name of the language model to use.
        size_of_sample (int): Number of articles to process. Default is 60.

    Returns:
        list: A list of results containing the indices, changed articles, changed JSON, original articles, etc.
    """
    
    indices = random.sample(list(range(len(articles_with_facts))), size_of_sample)
    results = []
    for index in indices:
        article_content = list(articles_with_facts['article_content'])[index]
        json_dict = list(articles_with_facts['article_content'])[index]

        
        print("THIS IS DICTIONARY THAT WAS EXTRACTED")
        print_readable_dict(json_dict)
        topic_to_change = random.sample(CHANGES3, 2)
        print("We will change topics:", topic_to_change[0][0], " and ", topic_to_change[1][0])
        response = ollama.chat(model=name_of_the_model, messages=[
            {
                'role': 'user',
                'content': change_one_information[change_of_article].format(
                    article=article_content,
                    facts =json.dumps(json_dict, indent=4, ensure_ascii=False),
                    change_topic_1=topic_to_change[0][0],
                    change_data_1=topic_to_change[0][1]
                ),
                'temperature': 0.2,
            }
        ])
        generated = response['message']['content']
        changed_json = find_json(generated)
        
        changed_json = changed_json[0] if changed_json else None
        changed_article = extract_last_article(generated)
        if not changed_article or not changed_json:
            print("We did not get through the first round!")
            if print_comments == True:
                print("THIS IS THE FIRST GENERATED TEXT \n\n", generated)
                print("EXTRACTED ARTICLE: ", changed_article)
                print("EXTRACTED JSON: ", changed_json)
            results.append((index, None, None, article_content, json_dict, topic_to_change[0][0], topic_to_change[1][0]))
        else:
            response = ollama.chat(model=name_of_the_model, messages=[
                {
                    'role': 'user',
                    'content': change_one_information[change_of_article].format(
                        article=changed_article,
                        facts =json.dumps(changed_json, indent=4, ensure_ascii=False),
                        change_topic_1=topic_to_change[1][0],
                        change_data_1=topic_to_change[1][1]
                    ),
                    'temperature': 0.2,
                }
            ])
            generated = response['message']['content']
            twice_changed_json = find_json(generated)
            if twice_changed_json:
                twice_changed_json = twice_changed_json[0]
            
            twice_changed_article = extract_last_article(generated)
            if print_comments == True:
                print("THIS IS THE SECOND GENERATED TEXT \n\n", generated)
                print("TWICE EXTRACTED ARTICLE: ", twice_changed_article)
                print("TWICE EXTRACTED JSON: ", twice_changed_json)
            results.append([index, twice_changed_article, twice_changed_json, article_content, json_dict, topic_to_change[0][0], topic_to_change[1][0]])
    
    columns_names = ["index", "transformed_article_text", "changed_facts","article_text", "facts", "first_changed_topic", "second_changed_topic"]
    results = pd.DataFrame(results, columns = columns_names)
    
    return results
# Read CSV file with specified encoding
df = pd.read_csv('FA-KES-Dataset.csv', encoding='ISO-8859-1')  # or use 'latin1'

# Filter data where 'labels' column is equal to 1
true_articles = df[df['labels'] == 1]

# Display the filtered data
print(len(true_articles))

# Example usage:
results = extract_and_change_articles(true_articles,'llama3.1:8b',50 , print_comments=True, change_of_article="paraphrase_aggressive")


  from pandas.core.computation.check import NUMEXPR_INSTALLED


426
Original article:  Date of publication 30 September 2017 Four children are among the 28 dead in overnight airstrikes on the town of Armanaz in Idlib province purportedly carried out by Syrian regime or Russian jets. Tags Syria Russia Idlib safe zone Turkey Assad SOHR At least 28 civilians were killed in airstrikes on northwestern Syria despite a planned safe zone. Four children were among the dead in the overnight strikes on the town of Armanaz in Idlib province near the Turkish border the Syrian Observatory for Human Rights said. The Britain-based watchdog had earlier reported 12 dead in the strikes on the town in Harem district around 20 kilometres northwest of the provincial capital Idlib. Entire apartment blocks had been flattened by the bombardment an AFP correspondent said. The Observatory said it could not immediately determine whether the strikes had been carried out by jets of the Syrian regime or its ally Russia. But they are the latest in an intensifying air campaign car

In [3]:
import csv
import json
import os
import numpy as np
folder_name = "changed_articles"
# Define the CSV file name
csv_file_name = 'fake_articles_one_by_one_generation_aggresive.csv'

path_string = os.path.join(folder_name, csv_file_name)

def filter_tuples(tuples_list):
    """Filter out tuples that contain None."""
    return [tup for tup in tuples_list if None not in tup and np.nan not in tup and '' not in tup]

def append_to_csv(data, file_name):
    """Append filtered data to a CSV file."""
    with open(file_name, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Convert dictionaries in the tuples to JSON strings
        for row in data:
            converted_row = [json.dumps(item) if isinstance(item, dict) else item for item in row]
            writer.writerow(converted_row)
        print(f'Data has been appended to {file_name}')

# Assuming results_new_prompt is already defined and populated
results.to_csv(path_string, index=False, mode='a')
filtered_data = filter_tuples(results.values.tolist())

print(f"Number of valid entries: {len(filtered_data)}")



Number of valid entries: 41


In [8]:
import ollama     

response = ollama.chat(
            model='mixtral:8x7b',
            messages=[
                {
                    'role': 'user',
                    'content': "Hello, how are you?"
                    ,
                    'temperature': 0.2,
                }
            ])
    
generated = response['message']['content']
print(generated)


 Hello! I'm just a computer program, so I don't have feelings, but I'm here to help you with any language-related questions you have. Is there something specific you would like to know or practice? I can assist with grammar, vocabulary, sentence structure, and more. Just let me know how I can help!
