In [None]:
import ollama
import os
import json
import sys
import re
import random

def extract_json(text):
    """
    Extracts a JSON array from a given text string and parses it into a Python object.

    Parameters:
    - text (str): The input string containing a JSON array along with additional text.

    Returns:
    - list: The extracted and parsed JSON array as a Python list.
    - None: If no JSON array is found or if parsing fails.
    """
    # Match JSON array within the text
    json_match = re.search(r'\[\s*{.*?}\s*\]', text, re.DOTALL)

    if json_match:
        json_string = json_match.group(0)  # Extract the matched JSON string
        try:
            # Parse the JSON data
            data = json.loads(json_string)
            return data
        except json.JSONDecodeError as e:
            print("Failed to decode JSON:", e)
            return None
    else:
        print("No JSON data found")
        return None

# Determine the path to the Fakes700 directory
parent_directory = os.path.dirname(os.getcwd())
fakes_directory = os.path.join(parent_directory, 'ACL2019_Data', 'Fakes700')

# Function to read and load contents of text files
def load_articles(file_paths):
    articles = []
    for file_path in file_paths:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
            articles.append(content)
    return articles


def generate_fake_news(number_of_news, number_of_facts, number_of_facts_changed):

  # Generate list of 10 random article filenames
  article_indices = random.sample(range(0, 705), number_of_news)
  article_filenames = [f"article_{index}.txt" for index in article_indices]
  article_paths = [os.path.join(fakes_directory, filename) for filename in article_filenames]

  # Load articles
  articles = load_articles(article_paths)


  # Initialize an empty list to store results
  results = []

  # Iterate over the articles and process each one
  for article in articles:
      prompt = """
      You are a journalist working on world news. Extract {number_of_facts} topics of facts from the given article.

      BEGINNING OF THE ARTICLE
      {article}
      END OF THE ARTICLE

      Topics should be outputted in standard JSON form as follows:
      [
        {{
          "Name of fact": "name of type of fact",
          "Description of fact": "What information does the fact contain.",
          "Common examples": "Some examples of the facts of prescribed type."
        }}
      ]

      BEGINNING OF THE ARTICLE
      {article}
      END OF THE ARTICLE

      OUTPUT EXAMPLES OF TOPICS
      [
        {{
          "Name of fact": "Turnout",
          "Description of fact": "The number of people who take part in election.",
          "Common examples": "1000, 10k"
        }},
        {{
          "Name of fact": "Gender or age group",
          "Description of fact": " of casualty indicates if the casualties are male or female, or specify their age group.",
          "Common examples": "Male, Female, Child, Adult, Senior"
        }},
        {{
          "Name of fact": "Cause of death",
          "Description of fact": " specifies the weapons used by the aggressor (e.g., shooting, shelling, chemical weapons, etc.)",
          "Common examples": "Shooting, Shelling, Chemical weapons"
        }},
        {{
          "Name of fact": "Type",
          "Description of fact": " of casualty classifies the casualties as a civilian or non-civilian (e.g., military personnel are non-civilians).",
          "Common examples": "Civilian, Non-civilian"
        }},
        {{
          "Name of fact": "Actor",
          "Description of fact": " identifies the actors responsible for the incident, such as rebel groups, Russian forces, ISIS, the Syrian army, U.S. military, etc.",
          "Common examples": "Rebel groups, Russian forces, ISIS"
        }},
      ]
      END OF OUTPUT EXAMPLES
      Topics should be outputted in standard JSON form.
      """

      response = ollama.chat(model="Llama3.1:8B", messages=[
          {
              'role': 'user',
              'content': prompt.format(article=article, number_of_facts=number_of_facts),
              'temperature': 0.2,
          }
      ])

      output = response['message']['content']
      print(output)

      topics = extract_json(output)
      print(topics)

      # Import the module for article manipulation
      sys.path.append(os.path.join(os.getcwd(), 'Task2'))
      import General_fact_extraction_and_manipulation as generating

      # Process the article and topics
      result = generating.extract_and_change_articles_with_labeling_added(
          [article],
          topics,
          'llama3.1:8b',
          number_of_changed_facats = number_of_facts_changed,
          type_of_generation="one_by_one",
          print_comments=True,
          change_of_article="paraphrase_aggressive",
          number_of_facts_changed = number_of_facts_changed
      )

      results.append(result)

  # Output the list of all results
  return results



In [3]:
results = generate_fake_news(6, 3, 1)
print(results)

Here are 3 topics of facts extracted from the article:

```
[
  {
    "Name of fact": "Actor",
    "Description of fact": "identifies the actors responsible for the incident, such as radical Muslims.",
    "Common examples": "Radical Muslims"
  },
  {
    "Name of fact": "Cause of death",
    "Description of fact": "specifies the methods used to kill a man.",
    "Common examples": "Sodomizing, Killing"
  },
  {
    "Name of fact": "Type of accusation",
    "Description of fact": "classifies the accusations made against individuals such as Bill Clinton or Donald Trump.",
    "Common examples": "Sexual assault, Sexual predator"
  }
]
```
[{'Name of fact': 'Actor', 'Description of fact': 'identifies the actors responsible for the incident, such as radical Muslims.', 'Common examples': 'Radical Muslims'}, {'Name of fact': 'Cause of death', 'Description of fact': 'specifies the methods used to kill a man.', 'Common examples': 'Sodomizing, Killing'}, {'Name of fact': 'Type of accusation', '