# Flexible Multi-Agent Propaganda Detection System

## Core Features

### ‚ú® Feature1: Automatically discover languages and techniques
- Add a new language by creating a folder
- Add a new technique by adding a file
- Zero code changes

### ‚ú® Feature2: Automatically append output-format instructions
- Prompt files only need the technique description
- The system automatically appends output-format requirements (by language)
- Supports multi-language instructions (Russian, Polish, English, etc.)

---

## Step1: Import required libraries

In [2]:
import os
import json
import glob
from typing import List, Dict, Optional, Set, Tuple
from dataclasses import dataclass
from pathlib import Path
import re
from collections import Counter

from ollama import Client

from typing import Dict, List, Union
import time
from tqdm import tqdm
# AutoGen imports
from autogen import ConversableAgent, GroupChat, GroupChatManager

print("‚úì TODO: translate to EnglishÔºÅ")

‚úì TODO: translate to EnglishÔºÅ


## Step2: Define data structures

In [3]:
@dataclass
class TechniquePrompt:
    """Persuasion-technique prompt data structure"""
    name: str                    # Technique name
    language: str                # Language code
    prompt_content: str          # Prompt content
    file_path: str              # TODO: translate to English
    category: Optional[str] = None  # TODO: translate to English

print("‚úì TODO: translate to EnglishÔºÅ")

‚úì TODO: translate to EnglishÔºÅ


## Step3: Create a flexible prompt loader

This loader automatically discovers all languages and techniques

In [4]:
class FlexiblePromptLoader:
    """TODO: translate to English - Automatically discover languages and techniques"""
    
    def __init__(
        self,
        base_dir: str,
        languages: Optional[List[str]] = None,
        techniques: Optional[List[str]] = None,
        exclude_languages: Optional[List[str]] = None,
        exclude_techniques: Optional[List[str]] = None,
        config_file: Optional[str] = None,
        prompt_file_suffix: str = "_prompt.md",
        verbose: bool = True
    ):
        """
        TODO: translate to English
        
        Args:
            base_dir: TODO: translate to English
            languages: TODO: translate to EnglishÔºàNoneTODO: translate to EnglishÔºâ
            techniques: TODO: translate to EnglishÔºàNoneTODO: translate to EnglishÔºâ
            exclude_languages: TODO: translate to EnglishExcludeTODO: translate to English
            exclude_techniques: TODO: translate to EnglishExcludeTODO: translate to English
            config_file: TODO: translate to English
            prompt_file_suffix: TODO: translate to English
            verbose: TODO: translate to English
        """
        self.base_dir = base_dir
        self.prompt_file_suffix = prompt_file_suffix
        self.verbose = verbose
        
        # TODO: translate to EnglishÔºåTODO: translate to English
        if config_file and os.path.exists(config_file):
            self._load_from_config(config_file)
        else:
            self.specified_languages = languages
            self.specified_techniques = techniques
            self.exclude_languages = exclude_languages or []
            self.exclude_techniques = exclude_techniques or []
        
        if not os.path.exists(base_dir):
            raise FileNotFoundError(f"TODO: translate to English: {base_dir}")
        
        # TODO: translate to English
        self.prompts = {}  # {(technique, language): TechniquePrompt}
        self.discovered_languages = set()
        self.discovered_techniques = set()
        
        # TODO: translate to English
        self._discover_and_load()
    
    def _load_from_config(self, config_file: str):
        """TODO: translate to English"""
        with open(config_file, 'r', encoding='utf-8') as f:
            config = json.load(f)
        
        self.specified_languages = config.get('languages')
        self.specified_techniques = config.get('techniques')
        self.exclude_languages = config.get('exclude_languages', [])
        self.exclude_techniques = config.get('exclude_techniques', [])
        
        if self.verbose:
            print(f"‚úì TODO: translate to English: {config_file}")
    
    def _discover_languages(self) -> List[str]:
        """TODO: translate to English"""
        languages = []
        for item in os.listdir(self.base_dir):
            item_path = os.path.join(self.base_dir, item)
            if os.path.isdir(item_path):
                # TODO: translate to English
                prompt_files = glob.glob(
                    os.path.join(item_path, f"*{self.prompt_file_suffix}")
                )
                if prompt_files:
                    languages.append(item.lower())
        return sorted(languages)
    
    def _discover_techniques_for_language(self, language: str) -> List[str]:
        """TODO: translate to English"""
        techniques = []
        lang_dir = os.path.join(self.base_dir, language)
        
        if not os.path.exists(lang_dir):
            return []
        
        pattern = os.path.join(lang_dir, f"*{self.prompt_file_suffix}")
        prompt_files = glob.glob(pattern)
        
        for filepath in prompt_files:
            filename = os.path.basename(filepath)
            technique_name = filename[:-len(self.prompt_file_suffix)]
            techniques.append(technique_name)
        
        return sorted(techniques)
    
    def _should_include_language(self, language: str) -> bool:
        """TODO: translate to English"""
        if language in self.exclude_languages:
            return False
        if self.specified_languages is not None:
            return language in self.specified_languages
        return True
    
    def _should_include_technique(self, technique: str) -> bool:
        """TODO: translate to Englishtechnique(s)"""
        if technique in self.exclude_techniques:
            return False
        if self.specified_techniques is not None:
            return technique in self.specified_techniques
        return True
    
    def _discover_and_load(self):
        """TODO: translate to English"""
        if self.verbose:
            print("\n" + "="*70)
            print("üîç TODO: translate to English")
            print("="*70)
            print(f"TODO: translate to English: {self.base_dir}")
            print(f"TODO: translate to English: {self.prompt_file_suffix}")
        
        # TODO: translate to English
        all_languages = self._discover_languages()
        
        if self.verbose:
            print(f"\nüìÇ TODO: translate to English: {', '.join(all_languages) if all_languages else 'TODO: translate to English'}")
        
        if not all_languages:
            print("‚ö†Ô∏è TODO: translate to English: TODO: translate to EnglishÔºÅ")
            return
        
        # TODO: translate to English
        languages_to_load = [
            lang for lang in all_languages 
            if self._should_include_language(lang)
        ]
        
        if self.verbose and languages_to_load:
            print(f"‚úì TODO: translate to English: {', '.join(languages_to_load)}")
            if set(all_languages) - set(languages_to_load):
                excluded = set(all_languages) - set(languages_to_load)
                print(f"‚äò ExcludeTODO: translate to English: {', '.join(excluded)}")
        
        loaded_count = 0
        total_techniques_set = set()
        
        # TODO: translate to English
        for language in languages_to_load:
            if self.verbose:
                print(f"\nüìÅ TODO: translate to English: {language.upper()}")
            
            all_techniques = self._discover_techniques_for_language(language)
            
            if self.verbose:
                print(f"  TODO: translate to English {len(all_techniques)} technique(s)TODO: translate to English")
            
            techniques_to_load = [
                tech for tech in all_techniques
                if self._should_include_technique(tech)
            ]
            
            if self.verbose and techniques_to_load:
                print(f"  TODO: translate to English {len(techniques_to_load)} technique(s)")
                if set(all_techniques) - set(techniques_to_load):
                    excluded_count = len(set(all_techniques) - set(techniques_to_load))
                    print(f"  Exclude {excluded_count} technique(s)")
            
            # TODO: translate to Englishtechnique(s)TODO: translate to English
            for technique in techniques_to_load:
                filename = f"{technique}{self.prompt_file_suffix}"
                filepath = os.path.join(self.base_dir, language, filename)
                
                try:
                    with open(filepath, 'r', encoding='utf-8') as f:
                        content = f.read().strip()
                    
                    if content:
                        self.prompts[(technique, language)] = TechniquePrompt(
                            name=technique,
                            language=language,
                            prompt_content=content,
                            file_path=filepath
                        )
                        loaded_count += 1
                        total_techniques_set.add(technique)
                        self.discovered_languages.add(language)
                        self.discovered_techniques.add(technique)
                    else:
                        if self.verbose:
                            print(f"  ‚ö†Ô∏è Skip empty file: {filename}")
                
                except Exception as e:
                    if self.verbose:
                        print(f"  ‚úó Read failed {filename}: {e}")
        
        if self.verbose:
            print("\n" + "="*70)
            print("üìä TODO: translate to English")
            print("="*70)
            print(f"‚úì TODO: translate to English: {len(self.discovered_languages)}")
            print(f"  {', '.join(sorted(self.discovered_languages))}")
            print(f"\n‚úì TODO: translate to English: {len(total_techniques_set)}")
            techniques_list = sorted(total_techniques_set)
            for i in range(0, len(techniques_list), 3):
                batch = techniques_list[i:i+3]
                print(f"  {', '.join(batch)}")
            print(f"\n‚úì TODO: translate to English: {loaded_count} TODO: translate to English")
            print("="*70 + "\n")
    
    def get_prompt(self, technique: str, language: str) -> Optional[TechniquePrompt]:
        """TODO: translate to English"""
        return self.prompts.get((technique, language))
    
    def get_available_techniques(self, language: str) -> List[str]:
        """TODO: translate to English"""
        return sorted([
            tech for (tech, lang) in self.prompts 
            if lang == language
        ])
    
    def get_all_techniques(self) -> List[str]:
        """TODO: translate to English"""
        return sorted(list(self.discovered_techniques))
    
    def get_languages(self) -> List[str]:
        """TODO: translate to English"""
        return sorted(list(self.discovered_languages))
    
    def get_stats(self) -> Dict:
        """TODO: translate to English"""
        stats = {
            'total_prompts': len(self.prompts),
            'total_languages': len(self.discovered_languages),
            'total_techniques': len(self.discovered_techniques),
            'languages': sorted(list(self.discovered_languages)),
            'techniques': sorted(list(self.discovered_techniques)),
            'coverage': {}
        }
        
        for lang in self.discovered_languages:
            stats['coverage'][lang] = len(self.get_available_techniques(lang))
        
        return stats
    
    def save_config(self, output_path: str):
        """TODO: translate to English"""
        config = {
            'base_dir': self.base_dir,
            'languages': sorted(list(self.discovered_languages)),
            'techniques': sorted(list(self.discovered_techniques)),
            'prompt_file_suffix': self.prompt_file_suffix,
            'stats': self.get_stats()
        }
        
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(config, f, ensure_ascii=False, indent=2)
        
        print(f"‚úì TODO: translate to English: {output_path}")

print("‚úì FlexiblePromptLoaderTODO: translate to EnglishÔºÅ")

‚úì FlexiblePromptLoaderTODO: translate to EnglishÔºÅ


## Step4: Create a flexible multi-agent system

**Key point**: This class automatically appends output-format instructions to promptsÔºÅ

In [5]:
class FlexibleTechniqueAgentSystem:
    """
    TODO: translate to English Multi-Agent TODO: translate to English
    
    Core Features:
    1. TODO: translate to English
    2. TODO: translate to English
    3. TODO: translate to EnglishÔºàRussian/Polish/EnglishÔºâ
    """
    
    def __init__(
        self,
        prompt_loader,
        language: str,
        api_key: str,
        model: str = "gpt-4o-mini",
        temperature: float = 0,
        use_ollama: bool = False,
        ollama_host: str = "https://ollama-ui.pagoda.liris.cnrs.fr/ollama"
    ):
        """
        TODO: translate to EnglishMulti-AgentTODO: translate to English
        
        Args:
            prompt_loader: TODO: translate to English
            language: TODO: translate to English
            api_key: OpenAI APITODO: translate to English
            model: TODO: translate to English
            temperature: TODO: translate to English
            use_ollama: TODO: translate to EnglishOllama
            ollama_host: OllamaTODO: translate to English
        """
        self.prompt_loader = prompt_loader
        self.language = language.lower()
        self.model = model
        self.temperature = temperature
        self.use_ollama = use_ollama
        self.ollama_host = ollama_host
        
        # ‚úÖ TODO: translate to English4TODO: translate to EnglishÔºàTODO: translate to English __init__ TODO: translate to EnglishÔºâ
        # TODO: translate to EnglishLLM
        if use_ollama:
            # OllamaTODO: translate to English
            self.ollama_client = Client(
                host=ollama_host,
                headers={'Authorization': f'Bearer {api_key}'}
            )
            self.llm_config = None
            self.api_key = api_key
            print(f"üîå TODO: translate to EnglishOllamaTODO: translate to English: {model}")
            print(f"üåê TODO: translate to English: {ollama_host}")
        else:
            # OpenAITODO: translate to English
            self.llm_config = {
                "model": model,
                "api_key": api_key,
                "temperature": temperature
            }
            self.ollama_client = None
            print(f"üîå TODO: translate to EnglishOpenAITODO: translate to English: {model}")
        
        # TODO: translate to English
        available_techniques = self.prompt_loader.get_available_techniques(self.language)
        
        if not available_techniques:
            raise ValueError(
                f"TODO: translate to English '{self.language}' TODO: translate to EnglishÔºÅ\n"
                f"TODO: translate to English: {', '.join(self.prompt_loader.get_languages())}"
            )
        
        self.techniques = available_techniques
        
        print(f"\nüöÄ TODO: translate to English {len(self.techniques)} technique(s)TODO: translate to EnglishAgentTODO: translate to English")
        print(f"TODO: translate to English: {self.language.upper()}")
        print(f"TODO: translate to English: {model}")
        
        # TODO: translate to Englishagents
        self.agents = {}
        self._initialize_agents()
        
        print(f"‚úì TODO: translate to EnglishagentsTODO: translate to EnglishÔºÅ\n")

    # ====================
    # TODO: translate to EnglishÔºåTODO: translate to EnglishÔºåTODO: translate to English
    # ====================
    # def _get_output_instruction(self, technique_name: str) -> str:
    #     """
    #     TODO: translate to EnglishÔºàEnglishTODO: translate to EnglishÔºâ
        
    #     Args:
    #         technique_name: Technique name
        
    #     Returns:
    #         TODO: translate to English
    #     """
    #     return f"""
    # {'='*70}
    # CRITICAL OUTPUT INSTRUCTIONS:
    # {'='*70}

    # Your ONLY task is to determine if the technique "{technique_name}" is present in the given text.

    # RESPONSE FORMAT (STRICT):
    # 1. You MUST respond with ONLY '1' or '0'
    # 2. '1' = technique "{technique_name}" IS present in the text
    # 3. '0' = technique "{technique_name}" IS NOT present in the text
    # 4. NO explanations, NO justifications, NO additional text whatsoever
    # 5. If uncertain, respond with '0'

    # Your response must be exactly one character: either '1' or '0'.

    # EXAMPLES OF CORRECT RESPONSES:
    # - 1
    # - 0

    # EXAMPLES OF INCORRECT RESPONSES:
    # - 1 (technique is present)  ‚Üê DO NOT add explanations
    # - Yes, it's present  ‚Üê ONLY '1' or '0'
    # - 0, because...  ‚Üê DO NOT add justification

    # RESPOND WITH ONLY '1' OR '0':
    # """

    def _get_output_instruction(self, technique_name: str) -> str:
        """TODO: translate to English"""
        return f"""
    {'='*70}
    CRITICAL OUTPUT INSTRUCTIONS:
    {'='*70}

    Your task is to determine if the technique "{technique_name}" is CLEARLY and SIGNIFICANTLY present in the TARGET PARAGRAPH.

    STRICT EVALUATION CRITERIA:
    1. The technique must be EXPLICIT and OBVIOUS in the target paragraph
    2. There must be CLEAR EVIDENCE, not just vague similarities
    3. The technique must be CENTRAL to the paragraph's message, not just tangentially related
    4. When in doubt or if the evidence is weak, respond with '0'
    5. Only respond '1' if you are HIGHLY CONFIDENT the technique is present

    RESPONSE FORMAT:
    - '1' = technique "{technique_name}" is CLEARLY and SIGNIFICANTLY present
    - '0' = technique is absent OR evidence is weak/uncertain

    Be CONSERVATIVE in your judgment. It's better to miss a technique than to falsely detect one.

    Your response must be exactly one character: either '1' or '0'.

    RESPOND WITH ONLY '1' OR '0':
    """
    
    def _create_agent_system_prompt(self, technique_name: str) -> str:
        """
        TODO: translate to EnglishagentTODO: translate to English
        
        TODO: translate to English:
        1. TODO: translate to English
        2. TODO: translate to English
        3. TODO: translate to English
        
        TODO: translate to EnglishÔºåTODO: translate to EnglishÔºÅ
        
        Args:
            technique_name: Technique name
        
        Returns:
            TODO: translate to EnglishÔºàTODO: translate to English + TODO: translate to EnglishÔºâ
        """
        # 1. TODO: translate to English
        technique_prompt = self.prompt_loader.get_prompt(technique_name, self.language)
        
        if not technique_prompt:
            raise ValueError(
                f"TODO: translate to English {technique_name} TODO: translate to English {self.language} TODO: translate to English"
            )
        
        # 2. TODO: translate to EnglishPrompt contentÔºàTODO: translate to EnglishÔºâ
        original_content = technique_prompt.prompt_content
        
        # 3. TODO: translate to English
        output_instruction = self._get_output_instruction(technique_name)
        
        # 4. TODO: translate to English
        # TODO: translate to English + TODO: translate to English + TODO: translate to English
        full_prompt = f"""{original_content}

{output_instruction}"""
        
        return full_prompt
    
    def _initialize_agents(self):
        """TODO: translate to Englishagents"""
        for technique in self.techniques:
            # TODO: translate to EnglishÔºàAutomatically append output-format instructionsÔºâ
            system_message = self._create_agent_system_prompt(technique)
            if self.use_ollama:
                # TODO: translate to EnglishOllamaÔºåTODO: translate to EnglishAutoGen agent
                # TODO: translate to Englishsystem messageTODO: translate to English
                self.agents[technique] = {
                    'system_message': system_message,
                    'type': 'ollama'
                }
            else:
                # Create a standard AutoGen agent
                agent = ConversableAgent(
                    name=f"{technique}_agent",
                    system_message=system_message,
                    llm_config=self.llm_config,
                    human_input_mode="NEVER",
                    max_consecutive_auto_reply=1
                )
                self.agents[technique] = agent
    
    def classify_text(self, text: str, verbose: bool = False) -> Tuple[List[str], Dict]:
        """
        TODO: translate to English
        
        Args:
            text: TODO: translate to English
            verbose: TODO: translate to English
        
        Returns:
            (detected_techniques, chat_history)
            - detected_techniques: DetectedTODO: translate to English
            - chat_history: TODO: translate to English
        """
        detected_techniques = []
        chat_history = {}
        
        if verbose:
            print(f"\n{'='*70}")
            print(f"TODO: translate to English ({len(self.techniques)} technique(s))")
            print(f"{'='*70}\n")
        
        for i, technique in enumerate(self.techniques, 1):
            if verbose:
                print(f"[{i}/{len(self.techniques)}] TODO: translate to English: {technique}...", end=" ")
            
            try:
                agent = self.agents[technique]

                if self.use_ollama:
                    system_message = agent['system_message']
            
                    response = self.ollama_client.chat(
                        model=self.model,
                        messages=[
                            {'role': 'system', 'content': system_message},
                            {'role': 'user', 'content': text}
                        ]
                    )
                    # Extract Ollama response
                    result = response['message']['content'].strip()
                else:
                    # Use an AutoGen agent
                    response = agent.generate_reply(
                        messages=[{"role": "user", "content": text}]
                    )
            
                    # üîß Fix: handle None responses
                    if response is None:
                        # generate_replyTODO: translate to EnglishNoneÔºåTODO: translate to English
                        result = '0'
                        if verbose:
                            print("‚ö†Ô∏è (AgentTODO: translate to EnglishNoneÔºåTODO: translate to English0) ", end="")
                    elif isinstance(response, str):
                        result = response.strip()
                    else:
                        result = str(response).strip()
                
                # üîß TODO: translate to EnglishÔºöTODO: translate to English
                # TODO: translate to English'0'/'1'ÔºåTODO: translate to English
                result_clean = result.strip()
                
                # Save history
                chat_history[technique] = {
                    'response': result_clean,
                    'detected': result_clean == '1'
                }

                # TODO: translate to EnglishDetected
                if result_clean == '1':
                    detected_techniques.append(technique)
                    if verbose:
                        print("‚úì Detected")
                elif result_clean == '0':
                    if verbose:
                        print("‚úó TODO: translate to EnglishDetected")
                else:
                    # Response format is invalid; attempting to extract
                    if '1' in result_clean and '0' not in result_clean:
                        detected_techniques.append(technique)
                        if verbose:
                            print(f"‚ö†Ô∏è Detected (TODO: translate to English: '{result_clean[:30]}')")
                    else:
                        if verbose:
                            print(f"‚ö†Ô∏è TODO: translate to EnglishDetected (TODO: translate to English: '{result_clean[:30]}')")

            except Exception as e:
                if verbose:
                    print(f"‚ùå Error: {e}")
                chat_history[technique] = {
                    'response': None,
                    'error': str(e),
                    'detected': False
                }
                
        if verbose:
            print(f"\n{'='*70}")
            print(f"TODO: translate to EnglishÔºÅDetected {len(detected_techniques)} technique(s)")
            if detected_techniques:
                print(f"DetectedTODO: translate to English: {', '.join(detected_techniques)}")
            print(f"{'='*70}\n")
        
        return detected_techniques, chat_history
    def classify_batch(
        self,
        fragments: List[Dict],
        text_key: str = 'text',
        verbose: bool = True
    ) -> List[Dict]:
        """
        TODO: translate to English
        
        Args:
            fragments: TODO: translate to EnglishÔºåTODO: translate to English
            text_key: TODO: translate to English
            verbose: TODO: translate to English
        
        Returns:
            TODO: translate to EnglishÔºåTODO: translate to English
        """
        results = []
        total = len(fragments)
        
        if verbose:
            print(f"\n{'='*70}")
            print(f"TODO: translate to English: {total} TODO: translate to English")
            print(f"{'='*70}\n")
        
        for i, fragment in enumerate(fragments, 1):
            if verbose:
                print(f"\nTODO: translate to English {i}/{total}...")
            
            text = fragment.get(text_key, '')
            
            if not text:
                if verbose:
                    print("  ‚ö†Ô∏è Skip empty text")
                results.append({
                    **fragment,
                    'detected_techniques': [],
                    'chat_history': {},
                    'error': 'Empty text'
                })
                continue
            
            try:
                detected, history = self.classify_text(text, verbose=False)
                
                results.append({
                    **fragment,
                    'detected_techniques': detected,
                    'chat_history': history
                })
                
                if verbose:
                    print(f"  ‚úì Detected {len(detected)} technique(s)")
                    if detected:
                        print(f"    {', '.join(detected)}")
            
            except Exception as e:
                if verbose:
                    print(f"  ‚úó Error: {e}")
                results.append({
                    **fragment,
                    'detected_techniques': [],
                    'chat_history': {},
                    'error': str(e)
                })
        
        if verbose:
            print(f"\n{'='*70}")
            print(f"TODO: translate to EnglishÔºÅ")
            print(f"{'='*70}\n")
        
        return results

print("‚úì FlexibleTechniqueAgentSystemTODO: translate to EnglishÔºÅ")

‚úì FlexibleTechniqueAgentSystemTODO: translate to EnglishÔºÅ


In [6]:
# ============================================================
# Step1: TODO: translate to Englishparagraph(s)TODO: translate to English
# ============================================================

from typing import List, Dict, Tuple

class ContextAwareParagraphDetector:
    """
    TODO: translate to Englishparagraph(s)TODO: translate to EnglishpropagandaTODO: translate to English
    
    Core Features:
    - TODO: translate to Englishparagraph(s)
    - TODO: translate to Englishparagraph(s)TODO: translate to EnglishÔºåTODO: translate to English
    - TODO: translate to Englishparagraph(s)ÔºåTODO: translate to EnglishLLMTODO: translate to Englishparagraph(s)
    """
    
# TODO: translate to EnglishÔºåTODO: translate to English __init__ TODO: translate to English

    def __init__(self, agent_system, language: str = None):
        self.agent_system = agent_system
        
        # TODO: translate to English
        if language is None and hasattr(agent_system, 'language'):
            self.language = agent_system.language.lower()
        else:
            self.language = language.lower() if language else 'en'
        
        print(f"‚úì TODO: translate to English (TODO: translate to English: {self.language.upper()})")
    
    def split_into_paragraphs(self, text: str, min_length: int = None):
        # TODO: translate to English
        if min_length is None:
            min_length = 1 if self.language == 'en' else 50

        paragraphs = []

        if self.language == 'en':
            raw_paragraphs = text.split('\n')  # EnglishÔºöTODO: translate to English
        else:
            raw_paragraphs = re.split(r'\n\s*\n', text)  # TODO: translate to EnglishÔºöTODO: translate to English
        
        # TODO: translate to English
        current_pos = 0
        # paragraph_id = 0
        
        for line_number, raw_para in enumerate(raw_paragraphs, start=1):
            raw_para = raw_para.strip()
            
            # TODO: translate to Englishparagraph(s)

            if len(raw_para) < min_length:
                current_pos = text.find(raw_para, current_pos)
                if current_pos != -1:
                    current_pos += len(raw_para)
                continue
            
            # TODO: translate to Englishparagraph(s)TODO: translate to EnglishPosition
            start_pos = text.find(raw_para, current_pos)
            if start_pos == -1:
                continue
            
            end_pos = start_pos + len(raw_para) - 1
            
            # paragraph_id += 1
            paragraphs.append({
                'paragraph_id': line_number,
                'start_pos': start_pos,
                'end_pos': end_pos,
                'text': raw_para,
                'char_count': len(raw_para),
                'word_count': len(raw_para.split())
            })
            
            current_pos = end_pos + 1
        
        return paragraphs
    
    def create_context_prompt(
        self, 
        full_article: str, 
        target_paragraph: str, 
        paragraph_id: int, 
        total_paragraphs: int
    ) -> str:
        """
        TODO: translate to English
        
        TODO: translate to Englishparagraph(s)ÔºåTODO: translate to EnglishLLMTODO: translate to Englishparagraph(s)
        
        Args:
            full_article: TODO: translate to English
            target_paragraph: TODO: translate to Englishparagraph(s)TODO: translate to English
            paragraph_id: paragraph(s)TODO: translate to English
            total_paragraphs: TODO: translate to Englishparagraph(s)TODO: translate to English
        
        Returns:
            TODO: translate to English
        """
        # TODO: translate to Englishparagraph(s)
        marked_article = full_article.replace(
            target_paragraph,
            f"\n{'='*70}\n>>> TARGET PARAGRAPH (paragraph(s) {paragraph_id}/{total_paragraphs}) <<<\n{'='*70}\n{target_paragraph}\n{'='*70}\n>>> END OF TARGET PARAGRAPH <<<\n{'='*70}\n",
            1  # TODO: translate to English
        )
        
        prompt = f"""CONTEXT: You are analyzing paragraph {paragraph_id} out of {total_paragraphs} paragraphs in the following article.

FULL ARTICLE (for context):
{'‚îÄ'*70}
{marked_article}
{'‚îÄ'*70}

IMPORTANT INSTRUCTIONS:
1. The article above is provided as CONTEXT to help you understand the overall narrative, tone, and argumentation strategy.
2. Your task is to determine if the propaganda technique is present in the marked TARGET PARAGRAPH ONLY (between the === markers).
3. You may use the full article context to better understand the TARGET PARAGRAPH, but you should ONLY evaluate whether the technique appears in the TARGET PARAGRAPH itself.
4. Consider the context when making your judgment, but base your decision on the TARGET PARAGRAPH content.

TARGET PARAGRAPH TO ANALYZE:
{target_paragraph}
"""
        return prompt
    
    def detect_paragraph_with_context(
        self,
        full_article: str,
        target_paragraph: str,
        paragraph_id: int,
        total_paragraphs: int,
        verbose: bool = False
    ) -> Tuple[List[str], Dict]:
        """
        TODO: translate to Englishparagraph(s)
        
        Args:
            full_article: TODO: translate to English
            target_paragraph: TODO: translate to Englishparagraph(s)TODO: translate to English
            paragraph_id: paragraph(s)TODO: translate to English
            total_paragraphs: TODO: translate to Englishparagraph(s)TODO: translate to English
            verbose: TODO: translate to English
        
        Returns:
            (detected_techniques, responses)
        """
        # TODO: translate to English
        context_prompt = self.create_context_prompt(
            full_article, 
            target_paragraph, 
            paragraph_id, 
            total_paragraphs
        )
        
        # TODO: translate to EnglishagentTODO: translate to English
        detected, responses = self.agent_system.classify_text(
            context_prompt,
            verbose=verbose
        )
        
        return detected, responses
    
    def detect_article_by_paragraphs(
        self, 
        article_id: str,
        full_text: str,
        min_paragraph_length: int = 50,
        verbose: bool = True
    ) -> Dict:
        """
        TODO: translate to Englishparagraph(s)TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
        
        Args:
            article_id: TODO: translate to EnglishIDÔºàTODO: translate to EnglishÔºâ
            full_text: TODO: translate to English
            min_paragraph_length: TODO: translate to Englishparagraph(s)TODO: translate to English
            verbose: TODO: translate to English
        
        Returns:
            TODO: translate to EnglishÔºåTODO: translate to Englishparagraph(s)TODO: translate to English
        """
        if verbose:
            print(f"\n{'='*70}")
            print(f"üìÑ TODO: translate to English: {article_id}")
            print(f"{'='*70}")
        
        # 1. TODO: translate to Englishparagraph(s)
        paragraphs = self.split_into_paragraphs(full_text, min_paragraph_length)
        
        if verbose:
            print(f"üìä TODO: translate to English:")
            print(f"  - TODO: translate to English: {len(full_text):,}")
            print(f"  - paragraph(s)TODO: translate to English: {len(paragraphs)}")
            if paragraphs:
                avg_para_len = sum(p['char_count'] for p in paragraphs) / len(paragraphs)
                print(f"  - TODO: translate to Englishparagraph(s)TODO: translate to English: {avg_para_len:.0f} TODO: translate to English")
            print(f"\nüîç TODO: translate to Englishparagraph(s)TODO: translate to EnglishÔºàTODO: translate to Englishparagraph(s)TODO: translate to EnglishÔºâ...")
        
        # 2. TODO: translate to Englishparagraph(s)TODO: translate to English
        results = []
        total_paragraphs = len(paragraphs)
        
        for i, para in enumerate(paragraphs, 1):
            if verbose:
                print(f"\n{'‚îÄ'*70}")
                print(f"[paragraph(s) {i}/{total_paragraphs}]")
                print(f"  Position: {para['start_pos']}-{para['end_pos']}")
                print(f"  TODO: translate to English: {para['char_count']} TODO: translate to English")
                print(f"  TODO: translate to English: {para['text'][:100]}...")
            
            # TODO: translate to Englishparagraph(s)
            detected, responses = self.detect_paragraph_with_context(
                full_article=full_text,
                target_paragraph=para['text'],
                paragraph_id=para['paragraph_id'],
                total_paragraphs=total_paragraphs,
                verbose=False
            )
            
            result = {
                'article_id': article_id,
                'paragraph_id': para['paragraph_id'],
                'start_pos': para['start_pos'],
                'end_pos': para['end_pos'],
                'char_count': para['char_count'],
                'detected_techniques': detected,
                'num_techniques': len(detected),
                'text': para['text'],  # TODO: translate to Englishparagraph(s)TODO: translate to English
                'text_preview': para['text'][:150]
            }
            
            results.append(result)
            
            if verbose:
                if detected:
                    print(f"  ‚úì Detected {len(detected)} technique(s):")
                    for tech in detected:
                        print(f"    ‚Ä¢ {tech}")
                else:
                    print(f"  ‚óã TODO: translate to EnglishDetectedpropagandaTODO: translate to English")
        
        if verbose:
            print(f"\n{'='*70}")
            print(f"‚úÖ TODO: translate to English")
            total_techniques = sum(r['num_techniques'] for r in results)
            paras_with_tech = sum(1 for r in results if r['num_techniques'] > 0)
            print(f"  TODO: translate to Englishparagraph(s)TODO: translate to English: {len(results)}")
            print(f"  TODO: translate to Englishparagraph(s): {paras_with_tech} ({paras_with_tech/len(results)*100:.1f}%)")
            print(f"  TODO: translate to Englishtechnique instance(s)TODO: translate to English: {total_techniques}")
            print(f"{'='*70}\n")
        
        return {
            'article_id': article_id,
            'total_paragraphs': len(paragraphs),
            'full_text_length': len(full_text),
            'full_text': full_text,  # TODO: translate to English
            'paragraph_results': results
        }
    
    def format_output_tsv(self, detection_result: Dict) -> List[str]:
        """
        TODO: translate to EnglishTSVTODO: translate to English
        TODO: translate to English: article_id    start_pos    end_pos    technique1    technique2    ...
        
        Args:
            detection_result: detect_article_by_paragraphsTODO: translate to English
        
        Returns:
            TSVTODO: translate to English
        """
        lines = []
        article_id = detection_result['article_id']
        
        for para_result in detection_result['paragraph_results']:
            # TODO: translate to EnglishDetectedTODO: translate to Englishparagraph(s)
            if para_result['detected_techniques']:
                techniques_str = '\t'.join(para_result['detected_techniques'])
                line = f"{article_id}\t{para_result['start_pos']}\t{para_result['end_pos']}\t{techniques_str}"
                lines.append(line)
        
        return lines
    
    def save_results_tsv(self, detection_results: List[Dict], output_file: str):
        """
        TODO: translate to EnglishTSVTODO: translate to English
        
        Args:
            detection_results: TODO: translate to English
            output_file: TODO: translate to English
        """
        with open(output_file, 'w', encoding='utf-8') as f:
            # TODO: translate to English
            f.write("article_id\tstart_pos\tend_pos\ttechniques\n")
            
            # TODO: translate to English
            for result in detection_results:
                tsv_lines = self.format_output_tsv(result)
                for line in tsv_lines:
                    f.write(line + '\n')
        
        print(f"‚úì TSVTODO: translate to English: {output_file}")
    
    def save_results_detailed(self, detection_results: List[Dict], output_file: str):
        """
        TODO: translate to EnglishJSONTODO: translate to EnglishÔºàTODO: translate to Englishparagraph(s)TODO: translate to EnglishÔºâ
        
        Args:
            detection_results: TODO: translate to English
            output_file: TODO: translate to English
        """
        import json
        
        # TODO: translate to English
        total_articles = len(detection_results)
        total_paragraphs = sum(r['total_paragraphs'] for r in detection_results)
        total_paras_with_tech = sum(
            sum(1 for pr in r['paragraph_results'] if pr['num_techniques'] > 0)
            for r in detection_results
        )
        total_technique_instances = sum(
            sum(pr['num_techniques'] for pr in r['paragraph_results'])
            for r in detection_results
        )
        
        # TODO: translate to English
        technique_counter = {}
        for result in detection_results:
            for para_result in result['paragraph_results']:
                for tech in para_result['detected_techniques']:
                    technique_counter[tech] = technique_counter.get(tech, 0) + 1
        
        sorted_techniques = sorted(technique_counter.items(), key=lambda x: x[1], reverse=True)
        
        output_data = {
            'summary': {
                'total_articles': total_articles,
                'total_paragraphs': total_paragraphs,
                'paragraphs_with_techniques': total_paras_with_tech,
                'paragraphs_with_techniques_percentage': (total_paras_with_tech/total_paragraphs*100) if total_paragraphs > 0 else 0,
                'total_technique_instances': total_technique_instances,
                'avg_techniques_per_paragraph': total_technique_instances/total_paragraphs if total_paragraphs > 0 else 0,
                'technique_frequency': dict(sorted_techniques)
            },
            'detection_method': 'context-aware paragraph detection',
            'note': 'Each paragraph was analyzed with full article context',
            'articles': detection_results
        }
        
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(output_data, f, ensure_ascii=False, indent=2)
        
        print(f"‚úì TODO: translate to EnglishJSONTODO: translate to English: {output_file}")

print("‚úì ContextAwareParagraphDetectorTODO: translate to EnglishÔºÅ")

‚úì ContextAwareParagraphDetectorTODO: translate to EnglishÔºÅ


In [7]:
# ============================================================
# TODO: translate to EnglishContextAwareParagraphDetectorTODO: translate to English
# ============================================================


class VotingContextAwareParagraphDetector(ContextAwareParagraphDetector):
    """
    TODO: translate to Englishparagraph(s)TODO: translate to English
    """
    
    def __init__(
        self, 
        agent_system: FlexibleTechniqueAgentSystem,
        voting_rounds: int = 3,
        voting_threshold: float = 0.6,
        use_temperature: bool = True,
        temperature: float = 0.3
    ):
        super().__init__(agent_system)
        self.voting_rounds = voting_rounds
        self.voting_threshold = voting_threshold
        self.use_temperature = use_temperature
        self.temperature = temperature
        
        if use_temperature:
            self.original_temperature = agent_system.temperature
            agent_system.temperature = temperature
    
    def detect_paragraph_with_voting(
        self,
        full_article: str,
        target_paragraph: str,
        paragraph_id: int,
        total_paragraphs: int,
        verbose: bool = False
    ) -> Dict:
        """TODO: translate to Englishparagraph(s)"""
        if verbose:
            print(f"\nüó≥Ô∏è  TODO: translate to English ({self.voting_rounds}TODO: translate to English)...")
        
        all_round_results = []
        technique_vote_counts = Counter()
        
        # TODO: translate to English
        for round_num in range(self.voting_rounds):
            if verbose:
                print(f"  TODO: translate to English {round_num + 1}/{self.voting_rounds}...", end=" ")
            
            # TODO: translate to English
            detected_techniques, _ = self.detect_paragraph_with_context(
                full_article=full_article,
                target_paragraph=target_paragraph,
                paragraph_id=paragraph_id,
                total_paragraphs=total_paragraphs,
                verbose=False
            )
            
            all_round_results.append(detected_techniques)
            
            for tech in detected_techniques:
                technique_vote_counts[tech] += 1
            
            if verbose:
                print(f"Detected {len(detected_techniques)} technique(s)")
            
            if round_num < self.voting_rounds - 1:
                time.sleep(0.5)
        
        # TODO: translate to English
        min_votes = int(self.voting_rounds * self.voting_threshold)
        final_techniques = [
            tech for tech, count in technique_vote_counts.items()
            if count >= min_votes
        ]
        
        final_techniques.sort(
            key=lambda t: technique_vote_counts[t],
            reverse=True
        )
        
        if verbose and final_techniques:
            print(f"\n  üìä TODO: translate to English:")
            print(f"     TODO: translate to English: {min_votes}/{self.voting_rounds} TODO: translate to English")
            print(f"     TODO: translate to English: {len(final_techniques)}")
            for tech in final_techniques:
                votes = technique_vote_counts[tech]
                print(f"       ‚Ä¢ {tech}: {votes}/{self.voting_rounds} TODO: translate to English ({votes/self.voting_rounds:.0%})")
        
        return {
            'detected_techniques': final_techniques,
            'num_techniques': len(final_techniques),
            'voting_details': {
                'rounds': self.voting_rounds,
                'threshold': self.voting_threshold,
                'all_round_results': all_round_results,
                'vote_counts': dict(technique_vote_counts),
                'passed_techniques': final_techniques
            }
        }
    
    def detect_article_by_paragraphs(
        self,
        article_id: str,
        full_text: str,
        min_paragraph_length: int = 50,
        verbose: bool = False
    ) -> Dict:
        """TODO: translate to English"""
        if verbose:
            print(f"\n{'='*70}")
            print(f"üìÑ TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ: {article_id}")
            print(f"{'='*70}")
            print(f"‚öôÔ∏è  TODO: translate to English:")
            print(f"   - TODO: translate to English: {self.voting_rounds}")
            print(f"   - TODO: translate to English: {self.voting_threshold} ({int(self.voting_rounds * self.voting_threshold)}/{self.voting_rounds}TODO: translate to English)")
            if self.use_temperature:
                print(f"   - TODO: translate to English: {self.temperature}")
        
        # TODO: translate to Englishparagraph(s)
        paragraphs = self.split_into_paragraphs(full_text, min_paragraph_length)
        total_paragraphs = len(paragraphs)
        
        if verbose:
            print(f"üìä TODO: translate to English:")
            print(f"  - TODO: translate to English: {len(full_text):,}")
            print(f"  - paragraph(s)TODO: translate to English: {total_paragraphs}")
            if paragraphs:
                avg_len = sum(p['char_count'] for p in paragraphs) // total_paragraphs
                print(f"  - TODO: translate to Englishparagraph(s)TODO: translate to English: {avg_len} TODO: translate to English")
            print(f"\nüîç TODO: translate to Englishparagraph(s)TODO: translate to English...")
        
        paragraph_results = []
        
        for i, para_info in enumerate(paragraphs, 1):
            if verbose:
                print(f"\n{'‚îÄ'*70}")
                print(f"[paragraph(s) {i}/{total_paragraphs}]")
                print(f"  Position: {para_info['start_pos']}-{para_info['end_pos']}")
                print(f"  TODO: translate to English: {para_info['char_count']} TODO: translate to English")
                print(f"  TODO: translate to English: {para_info['text'][:70]}...")
            
            # TODO: translate to English
            result = self.detect_paragraph_with_voting(
                full_article=full_text,
                target_paragraph=para_info['text'],
                paragraph_id=para_info['paragraph_id'],
                total_paragraphs=total_paragraphs,
                verbose=verbose
            )
            
            # TODO: translate to EnglishPositionTODO: translate to English
            result['article_id'] = article_id
            result['start_pos'] = para_info['start_pos']
            result['end_pos'] = para_info['end_pos']
            result['paragraph_text'] = para_info['text']
            
            paragraph_results.append(result)
            
            if result['num_techniques'] > 0:
                if verbose:
                    print(f"  ‚úì Final confirmation {result['num_techniques']} technique(s):")
                    for tech in result['detected_techniques']:
                        votes = result['voting_details']['vote_counts'][tech]
                        print(f"    ‚Ä¢ {tech} ({votes}/{self.voting_rounds})")
            else:
                if verbose:
                    print(f"  ‚óã TODO: translate to EnglishDetectedpropagandaTODO: translate to English")
        
        total_techniques = sum(r['num_techniques'] for r in paragraph_results)
        paragraphs_with_techniques = sum(1 for r in paragraph_results if r['num_techniques'] > 0)
        
        if verbose:
            print(f"\n{'='*70}")
            print(f"‚úÖ TODO: translate to English")
            print(f"  TODO: translate to Englishparagraph(s)TODO: translate to English: {len(paragraph_results)}")
            print(f"  TODO: translate to Englishparagraph(s): {paragraphs_with_techniques} ({paragraphs_with_techniques/len(paragraph_results):.1%})")
            print(f"  TODO: translate to Englishtechnique instance(s)TODO: translate to English: {total_techniques}")
            print(f"{'='*70}")
        
        return {
            'article_id': article_id,
            'full_text': full_text,
            'num_paragraphs': len(paragraph_results),
            'paragraphs_with_techniques': paragraphs_with_techniques,
            'total_techniques': total_techniques,
            'paragraph_results': paragraph_results,
            'voting_config': {
                'rounds': self.voting_rounds,
                'threshold': self.voting_threshold,
                'temperature': self.temperature if self.use_temperature else 0.0
            }
        }
    
    def __del__(self):
        if hasattr(self, 'use_temperature') and self.use_temperature:
            if hasattr(self, 'original_temperature'):
                self.agent_system.temperature = self.original_temperature


print("‚úì VotingContextAwareParagraphDetector TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ")

‚úì VotingContextAwareParagraphDetector TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ


## Step5: TODO: translate to EnglishAPITODO: translate to English

**TODO: translate to EnglishÔºÅ**

In [8]:
# ========================================
# TODO: translate to English
# ========================================

# TODO: translate to English
USE_OLLAMA = False  # True=Ollama, False=OpenAI

# TODO: translate to English
API_CONFIG_PATH = "your_api_config_file_location"
PROMPTS_BASE_DIR = "your_autogen_propaganda_analysis_results_directory"
# DATA_BASE_DIR = "your_trial_annotated_directory"
DATA_CONFIGS = {
    'checkthat_base': 'your_data_directory',
    'train_base': 'your_all_original_articlestrain_version_directory',
    'trial_base': 'your_trial_annotated_directory'  # TODO: translate to English
}

# TODO: translate to EnglishAPITODO: translate to English
print("TODO: translate to EnglishAPITODO: translate to English...")
try:
    with open(API_CONFIG_PATH, 'r') as f:
        api_config = json.load(f)
    
    # ‚úÖ TODO: translate to English api_keys TODO: translate to English
    api_keys = api_config.get('api_keys', {})
    
    if USE_OLLAMA:
        # TODO: translate to EnglishOllama
        api_key = api_keys.get('ollama_api_key')
        
        # TODO: translate to Englishollama_api_keyÔºåTODO: translate to Englishopenai_api_key
        if not api_key:
            api_key = api_keys.get('openai_api_key')
            print("‚ö†Ô∏è TODO: translate to Englishollama_api_keyÔºåTODO: translate to Englishopenai_api_keyTODO: translate to English")
        
        model = 'llama3:70b'  # TODO: translate to EnglishOllamaTODO: translate to English
        print("‚úì TODO: translate to EnglishOllamaTODO: translate to English")
        print(f"  TODO: translate to English: {model}")
    else:
        # TODO: translate to EnglishOpenAI
        api_key = api_keys.get('openai_api_key')
        model = 'gpt-4o-mini'
        print("‚úì TODO: translate to EnglishOpenAITODO: translate to English")
        print(f"  TODO: translate to English: {model}")
    
    if not api_key:
        raise ValueError(f"TODO: translate to English{'Ollama' if USE_OLLAMA else 'OpenAI'}TODO: translate to EnglishAPITODO: translate to EnglishÔºÅ")
    
    print("‚úì APITODO: translate to EnglishÔºÅ")
    
except Exception as e:
    print(f"‚úó TODO: translate to EnglishAPITODO: translate to English: {e}")
    print("TODO: translate to EnglishÔºÅ")
    api_key = None
    model = None

TODO: translate to EnglishAPITODO: translate to English...
‚úì TODO: translate to EnglishOpenAITODO: translate to English
  TODO: translate to English: gpt-4o-mini
‚úì APITODO: translate to EnglishÔºÅ


## Step6: TODO: translate to English

TODO: translate to English

In [9]:
# TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
loader = FlexiblePromptLoader(
    base_dir=PROMPTS_BASE_DIR,
    languages=['ru', 'po','en'],  # TODO: translate to EnglishRussianTODO: translate to EnglishPolish
    # exclude_techniques=['Slogans'],  # TODO: translate to EnglishÔºöExcludeTODO: translate to English
    verbose=True
)


üîç TODO: translate to English
TODO: translate to English: /home/jovyan/TRLAL/Total_work/research_projects/disinformation_detection/notebooks/data_analy/keyword_find/autogen_propaganda_analysis_results
TODO: translate to English: _prompt.md

üìÇ TODO: translate to English: en, po, ru
‚úì TODO: translate to English: en, po, ru

üìÅ TODO: translate to English: EN
  TODO: translate to English 19 technique(s)TODO: translate to English
  TODO: translate to English 19 technique(s)

üìÅ TODO: translate to English: PO
  TODO: translate to English 23 technique(s)TODO: translate to English
  TODO: translate to English 23 technique(s)

üìÅ TODO: translate to English: RU
  TODO: translate to English 23 technique(s)TODO: translate to English
  TODO: translate to English 23 technique(s)

üìä TODO: translate to English
‚úì TODO: translate to English: 3
  en, po, ru

‚úì TODO: translate to English: 23
  Appeal_to_Authority, Appeal_to_Fear_Prejudice, Appeal_to_Hypocrisy
  Appeal_to_Popularity, A

## Step7: TODO: translate to English

In [10]:
# TODO: translate to English
stats = loader.get_stats()

print("\nüìä TODO: translate to English:")
print(f"TODO: translate to English: {stats['total_prompts']}")
print(f"TODO: translate to English: {stats['total_languages']}")

print(f"TODO: translate to English: {stats['total_techniques']}")

print("\nüìÇ TODO: translate to English:")
for lang, count in stats['coverage'].items():
    print(f"  {lang.upper()}: {count} technique(s)")


üìä TODO: translate to English:
TODO: translate to English: 65
TODO: translate to English: 3
TODO: translate to English: 23

üìÇ TODO: translate to English:
  RU: 23 technique(s)
  PO: 23 technique(s)
  EN: 19 technique(s)


## Step8: TODO: translate to English

**TODO: translate to English**: TODO: translate to EnglishAutomatically append output-format instructionsÔºÅ

In [11]:
# TODO: translate to Englishtechnique(s)TODO: translate to English

import warnings
import logging

# TODO: translate to EnglishAutoGenTODO: translate to EnglishAPITODO: translate to English
warnings.filterwarnings('ignore', message='The API key specified is not a valid OpenAI format')

# TODO: translate to EnglishautogenTODO: translate to EnglishloggingTODO: translate to English
logging.getLogger('autogen.oai.client').setLevel(logging.ERROR)

prompt = loader.get_prompt('Straw_Man', 'po')
# print(prompt.prompt_content)  # ‚Üê TODO: translate to English.mdTODO: translate to English

# TODO: translate to English(TODO: translate to English)
agent_system = FlexibleTechniqueAgentSystem(
    prompt_loader=loader,
    language='po',
    api_key=api_key,
    model=model
)
full_prompt = agent_system._create_agent_system_prompt('Straw_Man')
print(full_prompt)  # ‚Üê TODO: translate to English + TODO: translate to English

üîå TODO: translate to EnglishOpenAITODO: translate to English: gpt-4o-mini

üöÄ TODO: translate to English 23 technique(s)TODO: translate to EnglishAgentTODO: translate to English
TODO: translate to English: PO
TODO: translate to English: gpt-4o-mini
‚úì TODO: translate to EnglishagentsTODO: translate to EnglishÔºÅ

# Straw_Man - po

**TODO: translate to English**: 8
**TODO: translate to English**: success

---

## LLM Classification Prompt

The 'Straw Man' fallacy is a common rhetorical technique used in arguments where one party misrepresents an opponent's position to make it easier to attack or refute. In essence, the 'Straw Man' distorts the original argument, creating a false version of it that can be easily dismantled. This technique is particularly prevalent in political discourse and can manifest in various forms, such as oversimplifying complex arguments, exaggerating them, or attributing extreme views to opponents that they do not actually hold.

When training a language m

## Step9: TODO: translate to EnglishAgentTODO: translate to English

TODO: translate to Englishtechnique(s)TODO: translate to EnglishÔºÅ

In [12]:

import logging

# TODO: translate to EnglishautogenTODO: translate to EnglishERRORÔºåTODO: translate to EnglishWARNING
logging.getLogger("autogen.oai.client").setLevel(logging.ERROR)


#TODO: translate to EnglishPolish
# TODO: translate to EnglishAPITODO: translate to English
if api_key is None:
    print("‚ö†Ô∏è APITODO: translate to EnglishÔºåTODO: translate to EnglishÔºÅ")
    print("TODO: translate to EnglishStep5TODO: translate to EnglishAPITODO: translate to English„ÄÇ")
else:
    # TODO: translate to EnglishPolishTODO: translate to English
    system_po = FlexibleTechniqueAgentSystem(
        prompt_loader=loader,
        language='po',  # Polish
        api_key=api_key,
        model='gpt-4o-mini',
        temperature=0
    )

if api_key:
    # TODO: translate to EnglishRussianTODO: translate to English
    system_ru = FlexibleTechniqueAgentSystem(
        prompt_loader=loader,
        language='ru',  # Russian
        api_key=api_key,
        model='gpt-4o-mini',
        temperature=0
    )

    system_en = FlexibleTechniqueAgentSystem(
        prompt_loader=loader,
        language='en',  # English (English)
        api_key=api_key,
        model='gpt-4o-mini',
        temperature=0
    )
    # TODO: translate to EnglishRussianTODO: translate to English
    # ru_prompt = system_ru._create_agent_system_prompt("Straw_Man")
    
    # print("\nRussianTODO: translate to EnglishÔºàTODO: translate to English500TODO: translate to EnglishÔºâ:")
    # print("="*70)
    # print(ru_prompt[-500:])
    # print("\n‚úì TODO: translate to EnglishRussianTODO: translate to EnglishPolishTODO: translate to EnglishÔºÅ")
else:
    print("‚ö†Ô∏è TODO: translate to EnglishAPITODO: translate to EnglishÔºàTODO: translate to EnglishStep5Ôºâ")

üîå TODO: translate to EnglishOpenAITODO: translate to English: gpt-4o-mini

üöÄ TODO: translate to English 23 technique(s)TODO: translate to EnglishAgentTODO: translate to English
TODO: translate to English: PO
TODO: translate to English: gpt-4o-mini
‚úì TODO: translate to EnglishagentsTODO: translate to EnglishÔºÅ

üîå TODO: translate to EnglishOpenAITODO: translate to English: gpt-4o-mini

üöÄ TODO: translate to English 23 technique(s)TODO: translate to EnglishAgentTODO: translate to English
TODO: translate to English: RU
TODO: translate to English: gpt-4o-mini
‚úì TODO: translate to EnglishagentsTODO: translate to EnglishÔºÅ

üîå TODO: translate to EnglishOpenAITODO: translate to English: gpt-4o-mini

üöÄ TODO: translate to English 19 technique(s)TODO: translate to EnglishAgentTODO: translate to English
TODO: translate to English: EN
TODO: translate to English: gpt-4o-mini
‚úì TODO: translate to EnglishagentsTODO: translate to EnglishÔºÅ



In [13]:
# TODO: translate to English12
print("="*70)
print("üöÄ TODO: translate to English")
print("="*70)

# RussianTODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
context_detector_ru = ContextAwareParagraphDetector(system_ru, language='ru')
print("‚úì RussianTODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ")

# PolishTODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
context_detector_po = ContextAwareParagraphDetector(system_po, language='po')
print("‚úì PolishTODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ")

# EnglishTODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
context_detector_en = ContextAwareParagraphDetector(system_en, language='en')
print("‚úì EnglishTODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ")

üöÄ TODO: translate to English
‚úì TODO: translate to English (TODO: translate to English: RU)
‚úì RussianTODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
‚úì TODO: translate to English (TODO: translate to English: PO)
‚úì PolishTODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
‚úì TODO: translate to English (TODO: translate to English: EN)
‚úì EnglishTODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ


In [14]:
test_article_path = "your_article813494037_file_location"

with open(test_article_path, 'r', encoding='utf-8') as f:
    text = f.read()

paragraphs = context_detector_en.split_into_paragraphs(text)

print(f"üìä TODO: translate to English:")
print(f"  paragraph(s)TODO: translate to English: {len(paragraphs)}")
print(f"  paragraph(s)IDTODO: translate to English: {paragraphs[0]['paragraph_id']} - {paragraphs[-1]['paragraph_id']}")

# TODO: translate to English
para_ids = [p['paragraph_id'] for p in paragraphs]
print(f"  TODO: translate to English10TODO: translate to EnglishID: {para_ids[:10]}")

üìä TODO: translate to English:
  paragraph(s)TODO: translate to English: 32
  paragraph(s)IDTODO: translate to English: 1 - 33
  TODO: translate to English10TODO: translate to EnglishID: [1, 3, 4, 5, 6, 7, 8, 9, 10, 11]


In [15]:
import logging

# TODO: translate to EnglishautogenTODO: translate to EnglishERROR,TODO: translate to EnglishWARNING
logging.getLogger("autogen.oai.client").setLevel(logging.ERROR)

# ============================================================
# TODO: translate to English
# ============================================================

print("\n" + "="*70)
print("üó≥Ô∏è  TODO: translate to English(TODO: translate to English)")
print("="*70)

# ============================================================
# RussianTODO: translate to English
# ============================================================

print("\n[1/3] Create a Russian voting detector...")

# TODO: translate to English
voting_detector_ru_balanced = VotingContextAwareParagraphDetector(
    agent_system=system_ru,
    voting_rounds=3,
    voting_threshold=0.67,
    use_temperature=True,
    temperature=0.3
)

# TODO: translate to English
voting_detector_ru_conservative = VotingContextAwareParagraphDetector(
    agent_system=system_ru,
    voting_rounds=5,
    voting_threshold=0.8,
    use_temperature=True,
    temperature=0.3
)

# TODO: translate to English
voting_detector_ru_aggressive = VotingContextAwareParagraphDetector(
    agent_system=system_ru,
    voting_rounds=3,
    voting_threshold=0.34,
    use_temperature=True,
    temperature=0.4
)

print("‚úì RussianTODO: translate to English(3TODO: translate to English)")

# ============================================================
# PolishTODO: translate to English
# ============================================================

print("\n[2/3] Create a Polish voting detector...")

# TODO: translate to English
voting_detector_po_balanced = VotingContextAwareParagraphDetector(
    agent_system=system_po,
    voting_rounds=3,
    voting_threshold=0.67,
    use_temperature=True,
    temperature=0.3
)

# TODO: translate to English
voting_detector_po_conservative = VotingContextAwareParagraphDetector(
    agent_system=system_po,
    voting_rounds=5,
    voting_threshold=0.8,
    use_temperature=True,
    temperature=0.3
)

# TODO: translate to English
voting_detector_po_aggressive = VotingContextAwareParagraphDetector(
    agent_system=system_po,
    voting_rounds=3,
    voting_threshold=0.34,
    use_temperature=True,
    temperature=0.4
)

print("‚úì PolishTODO: translate to English(3TODO: translate to English)")

# ============================================================
# EnglishTODO: translate to English
# ============================================================

print("\n[3/3] Create an English voting detector...")

# TODO: translate to English
voting_detector_en_balanced = VotingContextAwareParagraphDetector(
    agent_system=system_en,
    voting_rounds=3,
    voting_threshold=0.67,
    use_temperature=True,
    temperature=0.3
)

# TODO: translate to English
voting_detector_en_conservative = VotingContextAwareParagraphDetector(
    agent_system=system_en,
    voting_rounds=5,
    voting_threshold=0.8,
    use_temperature=True,
    temperature=0.3
)

# TODO: translate to English
voting_detector_en_aggressive = VotingContextAwareParagraphDetector(
    agent_system=system_en,
    voting_rounds=3,
    voting_threshold=0.34,
    use_temperature=True,
    temperature=0.4
)

print("‚úì EnglishTODO: translate to English(3TODO: translate to English)")

# TODO: translate to English(TODO: translate to English)
voting_detector_balanced = voting_detector_po_balanced
voting_detector_conservative = voting_detector_po_conservative
voting_detector_aggressive = voting_detector_po_aggressive

print("\n‚úì TODO: translate to English!")
print("\nTODO: translate to English:")
print("  Russian: voting_detector_ru_{balanced/conservative/aggressive}")
print("  Polish: voting_detector_po_{balanced/conservative/aggressive}")
print("  English: voting_detector_en_{balanced/conservative/aggressive}")
print("="*70)


üó≥Ô∏è  TODO: translate to English(TODO: translate to English)

[1/3] Create a Russian voting detector...
‚úì TODO: translate to English (TODO: translate to English: RU)
‚úì TODO: translate to English (TODO: translate to English: RU)
‚úì TODO: translate to English (TODO: translate to English: RU)
‚úì RussianTODO: translate to English(3TODO: translate to English)

[2/3] Create a Polish voting detector...
‚úì TODO: translate to English (TODO: translate to English: PO)
‚úì TODO: translate to English (TODO: translate to English: PO)
‚úì TODO: translate to English (TODO: translate to English: PO)
‚úì PolishTODO: translate to English(3TODO: translate to English)

[3/3] Create an English voting detector...
‚úì TODO: translate to English (TODO: translate to English: EN)
‚úì TODO: translate to English (TODO: translate to English: EN)
‚úì TODO: translate to English (TODO: translate to English: EN)
‚úì EnglishTODO: translate to English(3TODO: translate to English)

‚úì TODO: translate to Englis

##  TODO: translate to English

In [16]:
import os
from typing import List, Dict, Set, Tuple
from collections import defaultdict

# ============================================================
# TODO: translate to English
# ============================================================

class PropagandaEvaluator:
    """PropagandaTODO: translate to English"""
    
    def __init__(self):
        # Technique nameTODO: translate to English
        self.technique_normalization = {
            'Appeal_to_Fear-Prejudice': 'Appeal_to_Fear_Prejudice',
            'Repetitions': 'Repetition',
            'Name_Calling-Labeling': 'Name_Calling_Labeling',
            'Exaggeration-Minimisation': 'Exaggeration_Minimisation',
            # TODO: translate to English
        }
    
    def normalize_technique_name(self, technique: str) -> str:
        """
        TODO: translate to EnglishTechnique name
        
        Args:
            technique: TODO: translate to EnglishTechnique name
        
        Returns:
            TODO: translate to EnglishTechnique name
        """
        # TODO: translate to English
        if technique in self.technique_normalization:
            return self.technique_normalization[technique]
        
        # TODO: translate to English
        technique = technique.replace('-', '_')
        
        return technique
    
    def load_gold_annotations(self, annotation_file: str) -> Dict[str, List[Dict]]:
        """
        TODO: translate to English
        
        Args:
            annotation_file: TODO: translate to English
        
        Returns:
            TODO: translate to EnglishÔºö{article_id: [annotation1, annotation2, ...]}
        """
        annotations = defaultdict(list)
        
        if not os.path.exists(annotation_file):
            print(f"‚úó TODO: translate to English: {annotation_file}")
            return {}
        
        with open(annotation_file, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                
                parts = line.split('\t')
                if len(parts) < 4:  # TODO: translate to EnglishÔºöarticle_id, start, end, technique
                    continue
                
                article_id = parts[0]
                start_pos = int(parts[1])
                end_pos = int(parts[2])
                techniques = [self.normalize_technique_name(t) for t in parts[3:]]
                
                annotations[article_id].append({
                    'article_id': article_id,
                    'start_pos': start_pos,
                    'end_pos': end_pos,
                    'techniques': techniques,
                    'num_techniques': len(techniques)
                })
        
        print(f"‚úì TODO: translate to English {len(annotations)} TODO: translate to English")
        total_spans = sum(len(spans) for spans in annotations.values())
        print(f"‚úì TODO: translate to English: {total_spans}")
        
        return dict(annotations)
    
    def calculate_overlap(self, span1: Tuple[int, int], span2: Tuple[int, int]) -> float:
        """
        TODO: translate to English
        
        Args:
            span1: (start, end)
            span2: (start, end)
        
        Returns:
            TODO: translate to English (0.0 TODO: translate to English 1.0)
        """
        start1, end1 = span1
        start2, end2 = span2
        
        # Compute overlap region
        overlap_start = max(start1, start2)
        overlap_end = min(end1, end2)
        
        if overlap_start >= overlap_end:
            return 0.0  # No overlap
        
        overlap_length = overlap_end - overlap_start
        
        # Compute overlap ratio relative to the shorter span
        span1_length = end1 - start1
        span2_length = end2 - start2
        min_length = min(span1_length, span2_length)
        
        if min_length == 0:
            return 0.0
        
        return overlap_length / min_length
    
    def match_spans(
        self, 
        detected_spans: List[Dict], 
        gold_spans: List[Dict],
        overlap_threshold: float = 0.5
    ) -> Dict:
        """
        TODO: translate to English
        
        Args:
            detected_spans: TODO: translate to English
            gold_spans: TODO: translate to English
            overlap_threshold: TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
        
        Returns:
            TODO: translate to English
        """
        matched_pairs = []
        unmatched_detected = []
        unmatched_gold = []
        
        # TODO: translate to Englishgold spansTODO: translate to English
        gold_matched = [False] * len(gold_spans)
        
        # TODO: translate to EnglishÔºåTODO: translate to Englishgold span
        for detected in detected_spans:
            detected_span = (detected['start_pos'], detected['end_pos'])
            
            best_overlap = 0.0
            best_gold_idx = -1
            
            for idx, gold in enumerate(gold_spans):
                if gold_matched[idx]:
                    continue  # TODO: translate to English
                
                gold_span = (gold['start_pos'], gold['end_pos'])
                overlap = self.calculate_overlap(detected_span, gold_span)
                
                if overlap > best_overlap:
                    best_overlap = overlap
                    best_gold_idx = idx
            
            # TODO: translate to English
            if best_overlap >= overlap_threshold and best_gold_idx >= 0:
                matched_pairs.append({
                    'detected': detected,
                    'gold': gold_spans[best_gold_idx],
                    'overlap': best_overlap
                })
                gold_matched[best_gold_idx] = True
            else:
                unmatched_detected.append(detected)
        
        # TODO: translate to Englishgold spans
        for idx, gold in enumerate(gold_spans):
            if not gold_matched[idx]:
                unmatched_gold.append(gold)
        
        return {
            'matched_pairs': matched_pairs,
            'unmatched_detected': unmatched_detected,
            'unmatched_gold': unmatched_gold
        }
    
    def evaluate_techniques(self, matched_pairs: List[Dict]) -> Dict:
        """
        TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
        
        Args:
            matched_pairs: TODO: translate to English
        
        Returns:
            TODO: translate to English
        """
        total_detected = 0
        total_gold = 0
        total_correct = 0
        
        technique_stats = defaultdict(lambda: {'tp': 0, 'fp': 0, 'fn': 0})
        
        for pair in matched_pairs:
            detected_techs = set(pair['detected']['detected_techniques'])
            gold_techs = set(pair['gold']['techniques'])
            
            total_detected += len(detected_techs)
            total_gold += len(gold_techs)
            
            # TODO: translate to EnglishÔºöTODO: translate to EnglishÔºåTODO: translate to English
            correct = detected_techs & gold_techs
            total_correct += len(correct)
            
            # TODO: translate to EnglishÔºöTODO: translate to EnglishÔºåTODO: translate to English
            false_positive = detected_techs - gold_techs
            
            # TODO: translate to EnglishÔºöTODO: translate to EnglishÔºåTODO: translate to English
            false_negative = gold_techs - detected_techs
            
            # TODO: translate to Englishtechnique(s)TODO: translate to English
            for tech in correct:
                technique_stats[tech]['tp'] += 1
            
            for tech in false_positive:
                technique_stats[tech]['fp'] += 1
            
            for tech in false_negative:
                technique_stats[tech]['fn'] += 1
        
        # Compute overall metrics
        precision = total_correct / total_detected if total_detected > 0 else 0.0
        recall = total_correct / total_gold if total_gold > 0 else 0.0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
        
        return {
            'total_detected': total_detected,
            'total_gold': total_gold,
            'total_correct': total_correct,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'technique_stats': dict(technique_stats)
        }
    
    def evaluate_article(
        self,
        detected_result: Dict,
        gold_annotations: List[Dict],
        overlap_threshold: float = 0.5
    ) -> Dict:
        """
        TODO: translate to English
        
        Args:
            detected_result: TODO: translate to EnglishÔºàTODO: translate to Englishdetect_article_by_paragraphsÔºâ
            gold_annotations: TODO: translate to English
            overlap_threshold: TODO: translate to English
        
        Returns:
            TODO: translate to English
        """
        article_id = detected_result['article_id']
        
        # TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
        detected_spans = [
            span for span in detected_result['paragraph_results']
            if span['num_techniques'] > 0
        ]
        
        # TODO: translate to English
        matching_result = self.match_spans(detected_spans, gold_annotations, overlap_threshold)
        
        # TODO: translate to English
        technique_eval = self.evaluate_techniques(matching_result['matched_pairs'])
        
        # TODO: translate to English
        num_detected_spans = len(detected_spans)
        num_gold_spans = len(gold_annotations)
        num_matched_spans = len(matching_result['matched_pairs'])
        
        span_precision = num_matched_spans / num_detected_spans if num_detected_spans > 0 else 0.0
        span_recall = num_matched_spans / num_gold_spans if num_gold_spans > 0 else 0.0
        span_f1 = 2 * span_precision * span_recall / (span_precision + span_recall) if (span_precision + span_recall) > 0 else 0.0
        
        return {
            'article_id': article_id,
            'span_level': {
                'detected': num_detected_spans,
                'gold': num_gold_spans,
                'matched': num_matched_spans,
                'precision': span_precision,
                'recall': span_recall,
                'f1': span_f1
            },
            'technique_level': technique_eval,
            'matching_details': matching_result
        }
    
    def print_evaluation_report(self, eval_result: Dict, detailed: bool = True):
        """
        TODO: translate to English
        
        Args:
            eval_result: TODO: translate to English
            detailed: TODO: translate to English
        """
        article_id = eval_result['article_id']
        
        print(f"\n{'='*70}")
        print(f"TODO: translate to English: {article_id}")
        print(f"{'='*70}")
        
        # TODO: translate to English
        span = eval_result['span_level']
        print(f"\nüìç TODO: translate to English:")
        print(f"  TODO: translate to English: {span['detected']}")
        print(f"  TODO: translate to English: {span['gold']}")
        print(f"  TODO: translate to English: {span['matched']}")
        print(f"  TODO: translate to English: {span['precision']:.2%}")
        print(f"  TODO: translate to English: {span['recall']:.2%}")
        print(f"  F1TODO: translate to English: {span['f1']:.2%}")
        
        # TODO: translate to English
        tech = eval_result['technique_level']
        print(f"\nüéØ TODO: translate to English:")
        print(f"  TODO: translate to English: {tech['total_detected']}")
        print(f"  TODO: translate to English: {tech['total_gold']}")
        print(f"  TODO: translate to English: {tech['total_correct']}")
        print(f"  TODO: translate to English: {tech['precision']:.2%}")
        print(f"  TODO: translate to English: {tech['recall']:.2%}")
        print(f"  F1TODO: translate to English: {tech['f1']:.2%}")
        
        # TODO: translate to English
        if detailed and tech['technique_stats']:
            print(f"\nüìä TODO: translate to English:")
            print(f"{'Technique name':<35} {'TP':>4} {'FP':>4} {'FN':>4} {'TODO: translate to English':>7} {'TODO: translate to English':>7} {'F1':>7}")
            print(f"{'-'*70}")
            
            for tech_name, stats in sorted(tech['technique_stats'].items()):
                tp = stats['tp']
                fp = stats['fp']
                fn = stats['fn']
                
                prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
                rec = tp / (tp + fn) if (tp + fn) > 0 else 0.0
                f1 = 2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0.0
                
                print(f"{tech_name:<35} {tp:>4} {fp:>4} {fn:>4} {prec:>6.1%} {rec:>6.1%} {f1:>6.1%}")
        
        # ErrorTODO: translate to English
        if detailed:
            matching = eval_result['matching_details']
            
            if matching['unmatched_detected']:
                print(f"\n‚ö†Ô∏è  TODO: translate to English ({len(matching['unmatched_detected'])} TODO: translate to English):")
                for span in matching['unmatched_detected'][:3]:  # TODO: translate to English3TODO: translate to English
                    print(f"  Position {span['start_pos']}-{span['end_pos']}: {', '.join(span['detected_techniques'][:3])}")
            
            if matching['unmatched_gold']:
                print(f"\n‚ö†Ô∏è  TODO: translate to English ({len(matching['unmatched_gold'])} TODO: translate to English):")
                for span in matching['unmatched_gold'][:3]:  # TODO: translate to English3TODO: translate to English
                    print(f"  Position {span['start_pos']}-{span['end_pos']}: {', '.join(span['techniques'][:3])}")
        
        print(f"{'='*70}\n")


# ============================================================
# TODO: translate to English
# ============================================================

print("="*70)
print("üéØ TODO: translate to English")
print("="*70)

evaluator = PropagandaEvaluator()

print("‚úì TODO: translate to English\n")


# ============================================================
# TODO: translate to English
# ============================================================

print("="*70)
print("üìÇ TODO: translate to English")
print("="*70)

# PolishTODO: translate to English
PL_ANNOTATION_FILE = "your_subtask_2_annotations_file_location"

pl_gold_annotations = evaluator.load_gold_annotations(PL_ANNOTATION_FILE)

if pl_gold_annotations:
    total_spans = sum(len(spans) for spans in pl_gold_annotations.values())
    total_techniques = sum(
        sum(len(span['techniques']) for span in spans)
        for spans in pl_gold_annotations.values()
    )
    
    print(f"\nüìä TODO: translate to English:")
    print(f"  TODO: translate to English: {len(pl_gold_annotations)}")
    print(f"  TODO: translate to English: {total_spans}")
    print(f"  technique instance(s)TODO: translate to English: {total_techniques}")
    print(f"  TODO: translate to English: {total_spans/len(pl_gold_annotations):.1f} TODO: translate to English")
    print(f"  TODO: translate to English: {total_techniques/total_spans:.1f} technique(s)")

print("="*70)


# # ============================================================
# # TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
# # ============================================================

# if 'result' in locals() and pl_gold_annotations:
#     article_id = result['article_id']
    
#     if article_id in pl_gold_annotations:
#         print("\n" + "="*70)
#         print("üß™ TODO: translate to English")
#         print("="*70)
        
#         eval_result = evaluator.evaluate_article(
#             detected_result=result,
#             gold_annotations=pl_gold_annotations[article_id],
#             overlap_threshold=0.5
#         )
        
#         evaluator.print_evaluation_report(eval_result, detailed=True)
#     else:
#         print(f"\n‚ö†Ô∏è  TODO: translate to English {article_id} TODO: translate to English")
#         print(f"   TODO: translate to English: {list(pl_gold_annotations.keys())[:5]}...")

üéØ TODO: translate to English
‚úì TODO: translate to English

üìÇ TODO: translate to English
‚úì TODO: translate to English 15 TODO: translate to English
‚úì TODO: translate to English: 195

üìä TODO: translate to English:
  TODO: translate to English: 15
  TODO: translate to English: 195
  technique instance(s)TODO: translate to English: 609
  TODO: translate to English: 13.0 TODO: translate to English
  TODO: translate to English: 3.1 technique(s)


## TODO: translate to English

In [17]:
# ============================================================
# TODO: translate to English batch_detect_unified - TODO: translate to English
# ============================================================

import os
from pathlib import Path

def batch_detect_unified(
    articles_dict: Dict[str, Dict[str, str]],
    detector,
    detector_name: str = "TODO: translate to English",
    languages_to_test: List[str] = None,
    num_articles_per_language: int = None,
    min_paragraph_length: int = 50,
    # TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
    evaluator = None,
    gold_annotations: Dict[str, List] = None,
    overlap_threshold: float = 0.5,
    # TODO: translate to English
    output_dir: str = None,  # üÜï TODO: translate to EnglishÔºöTODO: translate to English
    save_tsv: bool = True,
    save_json: bool = True,
    verbose: bool = True
) -> Dict:
    """
    TODO: translate to English - TODO: translate to English
    
    Args:
        articles_dict: TODO: translate to English
        detector: TODO: translate to English
        detector_name: TODO: translate to English
        languages_to_test: TODO: translate to English
        num_articles_per_language: TODO: translate to English
        min_paragraph_length: TODO: translate to Englishparagraph(s)TODO: translate to English
        
        evaluator: TODO: translate to English
        gold_annotations: TODO: translate to English
        overlap_threshold: TODO: translate to English
        
        output_dir: TODO: translate to EnglishÔºàNoneTODO: translate to EnglishÔºâ
        save_tsv: TODO: translate to EnglishTSVTODO: translate to English
        save_json: TODO: translate to EnglishJSONTODO: translate to English
        verbose: TODO: translate to English
    
    Returns:
        TODO: translate to English
    """
    import time
    import json
    
    # üÜï TODO: translate to English
    if output_dir:
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)
        print(f"\nüìÅ TODO: translate to English: {output_dir}")
    else:
        output_path = Path(".")
    
    # TODO: translate to English
    evaluation_mode = (evaluator is not None and gold_annotations is not None)
    
    # TODO: translate to English
    language_display = {
        'ru': {'name': 'Russian', 'flag': 'üá∑üá∫'},
        'po': {'name': 'Polish', 'flag': 'üáµüá±'},
        'pl': {'name': 'Polish', 'flag': 'üáµüá±'},
        'en': {'name': 'English', 'flag': 'üá¨üáß'},
        'bg': {'name': 'TODO: translate to English', 'flag': 'üáßüá¨'},
    }
    
    if languages_to_test is None:
        languages_to_test = list(articles_dict.keys())
    
    all_results = {}
    start_time = time.time()
    
    print("\n" + "="*70)
    mode_str = "TODO: translate to English" if evaluation_mode else "TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ"
    print(f"üöÄ {mode_str}: {detector_name}")
    print("="*70)
    print(f"TODO: translate to English: {', '.join(languages_to_test)}")
    print(f"TODO: translate to English: {'TODO: translate to English' if num_articles_per_language is None else num_articles_per_language}")
    if save_tsv:
        print(f"‚úì TODO: translate to EnglishTSVTODO: translate to English")
    if save_json:
        print(f"‚úì TODO: translate to EnglishJSONTODO: translate to English")
    print("="*70 + "\n")
    
    for idx, lang_code in enumerate(languages_to_test, 1):
        lang_info = language_display.get(lang_code, {'name': lang_code.upper(), 'flag': 'üåê'})
        
        try:
            print(f"\n{'='*70}")
            print(f"{lang_info['flag']} [{idx}/{len(languages_to_test)}] TODO: translate to English {lang_info['name']} ({lang_code.upper()})")
            print(f"{'='*70}\n")
            
            lang_start_time = time.time()
            
            # TODO: translate to English
            if lang_code not in articles_dict:
                print(f"‚ùå TODO: translate to English '{lang_code}' TODO: translate to English")
                all_results[lang_code] = {'error': f"TODO: translate to English '{lang_code}' TODO: translate to English"}
                continue
            
            articles = articles_dict[lang_code]
            
            # TODO: translate to English
            if isinstance(articles, list):
                print(f"  ‚ÑπÔ∏è  DetectedTODO: translate to EnglishÔºåTODO: translate to English...")
                articles_dict_converted = {}
                for item in articles:
                    if isinstance(item, dict):
                        article_id = item.get('filename') or item.get('id') or item.get('article_id')
                        article_text = item.get('text') or item.get('content') or item.get('body')
                        if article_id and article_text:
                            articles_dict_converted[article_id] = article_text
                articles = articles_dict_converted
                print(f"  ‚úì TODO: translate to English: {len(articles)} TODO: translate to English")
            
            # TODO: translate to English
            if num_articles_per_language is not None and isinstance(articles, dict):
                articles_items = list(articles.items())[:num_articles_per_language]
                articles = dict(articles_items)
            
            if not articles:
                print(f"‚ùå TODO: translate to English")
                all_results[lang_code] = {'error': 'TODO: translate to English'}
                continue
            
            print(f"‚úì TODO: translate to English {len(articles)} TODO: translate to English")
            
            # TODO: translate to English
            print(f"\nüîç TODO: translate to English...")
            
            article_results = []
            tsv_outputs = []
            
            for i, (article_id, article_text) in enumerate(articles.items(), 1):
                if verbose:
                    print(f"\nProcessing article {i}/{len(articles)}: {article_id}")
                
                try:
                    result = detector.detect_article_by_paragraphs(
                        article_id=article_id,
                        full_text=article_text,
                        min_paragraph_length=min_paragraph_length,
                        verbose=False
                    )
                    
                    article_results.append(result)
                    
                    if save_tsv:
                        tsv_lines = detector.format_output_tsv(result)
                        tsv_outputs.extend(tsv_lines)
                    
                    if verbose:
                        num_paragraphs = len(result['paragraph_results'])
                        num_with_tech = sum(1 for p in result['paragraph_results'] if p['num_techniques'] > 0)
                        total_techniques = sum(p['num_techniques'] for p in result['paragraph_results'])
                        print(f"  ‚úì TODO: translate to English: {num_with_tech}/{num_paragraphs} paragraph(s)TODO: translate to English, total {total_techniques} TODO: translate to Englishtechnique instance(s)")
                
                except Exception as e:
                    print(f"  ‚ùå Error: {e}")
                    continue
            
            # TODO: translate to EnglishTSV üÜï TODO: translate to English
            if save_tsv and tsv_outputs:
                detector_name_safe = detector_name.replace(' ', '_').replace(':', '').replace('(', '').replace(')', '')
                tsv_filename = output_path / f"results_{lang_code}_{detector_name_safe}.tsv"
                
                with open(tsv_filename, 'w', encoding='utf-8') as f:
                    f.write("article_id\tstart_pos\tend_pos\ttechniques\n")
                    for line in tsv_outputs:
                        f.write(line + '\n')
                
                print(f"\nüíæ TSVTODO: translate to English: {tsv_filename}")
            
            # TODO: translate to EnglishJSON üÜï TODO: translate to English
            if save_json and article_results:
                detector_name_safe = detector_name.replace(' ', '_').replace(':', '').replace('(', '').replace(')', '')
                json_filename = output_path / f"results_{lang_code}_{detector_name_safe}_detailed.json"
                
                total_paragraphs = sum(len(r['paragraph_results']) for r in article_results)
                total_paras_with_tech = sum(
                    sum(1 for p in r['paragraph_results'] if p['num_techniques'] > 0)
                    for r in article_results
                )
                total_technique_instances = sum(
                    sum(p['num_techniques'] for p in r['paragraph_results'])
                    for r in article_results
                )
                
                technique_counter = {}
                for result in article_results:
                    for para_result in result['paragraph_results']:
                        for tech in para_result['detected_techniques']:
                            technique_counter[tech] = technique_counter.get(tech, 0) + 1
                
                json_data = {
                    'metadata': {
                        'language': lang_info['name'],
                        'language_code': lang_code,
                        'detector_name': detector_name,
                        'total_articles': len(article_results),
                        'total_paragraphs': total_paragraphs,
                        'paragraphs_with_techniques': total_paras_with_tech,
                        'total_technique_instances': total_technique_instances,
                        'technique_frequency': dict(sorted(technique_counter.items(), key=lambda x: x[1], reverse=True))
                    },
                    'articles': article_results
                }
                
                with open(json_filename, 'w', encoding='utf-8') as f:
                    json.dump(json_data, f, ensure_ascii=False, indent=2)
                
                print(f"üíæ JSONTODO: translate to English: {json_filename}")
            
            # TODO: translate to English
            lang_elapsed = time.time() - lang_start_time
            
            if article_results:
                total_paragraphs = sum(len(r['paragraph_results']) for r in article_results)
                total_paras_with_tech = sum(
                    sum(1 for p in r['paragraph_results'] if p['num_techniques'] > 0)
                    for r in article_results
                )
                total_technique_instances = sum(
                    sum(p['num_techniques'] for p in r['paragraph_results'])
                    for r in article_results
                )
                
                technique_counter = {}
                for result in article_results:
                    for para_result in result['paragraph_results']:
                        for tech in para_result['detected_techniques']:
                            technique_counter[tech] = technique_counter.get(tech, 0) + 1
                
                summary = {
                    'language': lang_code,
                    'language_name': lang_info['name'],
                    'detector_name': detector_name,
                    'num_articles_processed': len(article_results),
                    'total_paragraphs': total_paragraphs,
                    'paragraphs_with_techniques': total_paras_with_tech,
                    'total_technique_instances': total_technique_instances,
                    'technique_frequency': dict(sorted(technique_counter.items(), key=lambda x: x[1], reverse=True)),
                    'elapsed_time': lang_elapsed
                }
                
                print(f"\n‚úì {lang_info['name']}TODO: translate to English (TODO: translate to English: {lang_elapsed:.1f}TODO: translate to English):")
                print(f"  Processing articleTODO: translate to English: {len(article_results)}")
                print(f"  TODO: translate to Englishparagraph(s)TODO: translate to English: {total_paragraphs}")
                print(f"  TODO: translate to Englishparagraph(s): {total_paras_with_tech} ({(total_paras_with_tech/total_paragraphs*100):.1f}%)")
                print(f"  TODO: translate to Englishtechnique instance(s)TODO: translate to English: {total_technique_instances}")
                
                if technique_counter:
                    print(f"\n  üîù Most common techniques (Top 5):")
                    for tech, count in list(sorted(technique_counter.items(), key=lambda x: x[1], reverse=True))[:5]:
                        percentage = (count / total_technique_instances) * 100 if total_technique_instances > 0 else 0
                        print(f"    {tech}: {count} ({percentage:.1f}%)")
            else:
                summary = {'language': lang_code, 'error': 'TODO: translate to English'}
            
            all_results[lang_code] = summary
            
        except Exception as e:
            print(f"\n‚ùå {lang_info['name']}TODO: translate to English: {e}")
            all_results[lang_code] = {'error': str(e)}
    
    total_elapsed = time.time() - start_time
    
    print("\n" + "="*70)
    print(f"üìä TODO: translate to English - {detector_name}")
    print("="*70)
    print(f"\n{'TODO: translate to English':<15} {'TODO: translate to English':<8} {'TODO: translate to Englishparagraph(s)':<10} {'TODO: translate to Englishparagraph(s)':<12} {'technique instance(s)':<10} {'TODO: translate to English':<10}")
    print("-"*70)
    
    for lang_code in languages_to_test:
        if lang_code not in all_results:
            continue
        
        result = all_results[lang_code]
        lang_info = language_display.get(lang_code, {'name': lang_code.upper()})
        lang_name = lang_info['name']
        
        if 'error' in result:
            error_msg = result['error'][:20]
            print(f"{lang_name:<15} {'N/A':<8} {'N/A':<10} {'N/A':<12} {'N/A':<10} {error_msg:<10}")
        else:
            num_articles = result['num_articles_processed']
            total_paras = result.get('total_paragraphs', 0)
            paras_with_tech = result.get('paragraphs_with_techniques', 0)
            tech_instances = result.get('total_technique_instances', 0)
            elapsed = result.get('elapsed_time', 0)
            print(f"{lang_name:<15} {num_articles:<8} {total_paras:<10} {paras_with_tech:<12} {tech_instances:<10} {elapsed:<9.1f}s")
    
    print("-"*70)
    print(f"{'TODO: translate to English':<15} {'':<8} {'':<10} {'':<12} {'':<10} {total_elapsed:<9.1f}s")
    print("="*70 + "\n")
    
    return all_results


print("‚úì TODO: translate to English batch_detect_unified TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ!")

‚úì TODO: translate to English batch_detect_unified TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ!


In [18]:
# ============================================================
# TODO: translate to English
# ============================================================

def compare_detectors(
    data_base_dir: str,
    language_configs: Dict,
    detectors: Dict,  # {'TODO: translate to English': TODO: translate to English}
    evaluator: PropagandaEvaluator,
    languages_to_test: List[str] = None,
    num_articles_per_language: int = None,
    min_paragraph_length: int = 50,
    overlap_threshold: float = 0.5,
    save_tsv: bool = False
) -> Dict:
    """
    TODO: translate to English
    
    Args:
        detectors: TODO: translate to EnglishÔºåTODO: translate to English {'TODO: translate to English': TODO: translate to English}
        TODO: translate to English batch_test_all_languages
    
    Returns:
        TODO: translate to English
    """
    
    all_detector_results = {}
    
    print("\n" + "="*70)
    print(f"üî¨ TODO: translate to English")
    print("="*70)
    print(f"TODO: translate to English: {len(detectors)}")
    print(f"TODO: translate to English: {', '.join(detectors.keys())}")
    print("="*70 + "\n")
    
    # TODO: translate to English
    for detector_name, detector in detectors.items():
        print(f"\n{'#'*70}")
        print(f"# TODO: translate to English: {detector_name}")
        print(f"{'#'*70}\n")
        
        results = batch_test_all_languages(
            data_base_dir=data_base_dir,
            language_configs=language_configs,
            detector=detector,
            evaluator=evaluator,
            detector_name=detector_name,
            languages_to_test=languages_to_test,
        
            num_articles_per_language=num_articles_per_language,
            min_paragraph_length=min_paragraph_length,
            overlap_threshold=overlap_threshold,
            verbose=False,  # TODO: translate to English
            save_tsv=save_tsv
        )
        
        all_detector_results[detector_name] = results
    
    # TODO: translate to English
    print("\n" + "="*70)
    print("üìä TODO: translate to English")
    print("="*70)
    
    if languages_to_test is None:
        languages_to_test = list(language_configs.keys())
    
    for lang_code in languages_to_test:
        lang_name = language_configs[lang_code]['name']
        print(f"\n{lang_name}:")
        print(f"{'TODO: translate to English':<25} {'TODO: translate to EnglishF1':<12} {'TODO: translate to EnglishF1':<12} {'TODO: translate to English/TODO: translate to English':<15}")
        print("-"*70)
        
        for detector_name in detectors.keys():
            result = all_detector_results[detector_name].get(lang_code, {})
            
            if 'error' in result:
                print(f"{detector_name:<25} {'N/A':<12} {'N/A':<12} {'N/A':<15}")
            else:
                span_f1 = result.get('avg_span_f1', 0)
                tech_f1 = result.get('avg_tech_f1', 0)
                detected = result.get('total_detected_spans', 0)
                gold = result.get('total_gold_spans', 0)
                
                print(f"{detector_name:<25} {span_f1:<11.1%} {tech_f1:<11.1%} {detected}/{gold:<13}")
    
    print("="*70 + "\n")
    
    return all_detector_results


print("‚úì TODO: translate to English!")

‚úì TODO: translate to English!


## TODO: translate to English

In [19]:

# Language codeTODO: translate to EnglishÔºöTODO: translate to English -> TODO: translate to English
LANGUAGE_CONFIGS = {
    'ru': {
        'name': 'Russian',
        'code': 'RU',
        'flag': 'üá∑üá∫',
        'annotation_file': 'subtask-2-annotations.txt'
    },
    'po': {
        'name': 'Polish', 
        'code': 'PL',
        'flag': 'üáµüá±',
        'annotation_file': 'subtask-2-annotations.txt'
    },
    'bg': {
        'name': 'TODO: translate to English',
        'code': 'BG', 
        'flag': 'üáßüá¨',
        'annotation_file': 'subtask-2-annotations.txt'
    },
    'en': {
        'name': 'English',
        'code': 'EN',
        'flag': 'üá∫üá∏',  # TODO: translate to English üá∫üá∏
        'annotation_file': 'subtask-2-annotations.txt'
    },
}

LANGUAGE_CODE_MAP = {lang: config['code'] for lang, config in LANGUAGE_CONFIGS.items()}


def load_articles_from_path(article_dir: str, language: str = None, label: str = None):
    """
    TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
    
    Args:
        article_dir: TODO: translate to English
        language: Language codeÔºàTODO: translate to EnglishÔºåTODO: translate to EnglishÔºâ
        label: TODO: translate to EnglishÔºàTODO: translate to EnglishÔºåTODO: translate to English 'train', 'dev', 'test' TODO: translate to EnglishÔºâ
    
    Returns:
        list: TODO: translate to English
    """
    if not os.path.exists(article_dir):
        print(f"‚úó TODO: translate to English: {article_dir}")
        return None
    
    print(f"\n{'='*70}")
    print(f"üìÇ TODO: translate to English: {article_dir}")
    if language:
        lang_info = LANGUAGE_CONFIGS.get(language, {})
        print(f"üåç TODO: translate to English: {lang_info.get('flag', '')} {lang_info.get('name', language)}")
    if label:
        print(f"üè∑Ô∏è  TODO: translate to English: {label}")
    print(f"{'='*70}")
    
    # TODO: translate to English .txt TODO: translate to English
    articles = []
    all_txt_files = list(Path(article_dir).glob('*.txt'))
    
    # TODO: translate to English
    txt_files = [
        f for f in all_txt_files 
        if not f.name.startswith('._') and not f.name.startswith('.')
    ]
    
    if not txt_files:
        print(f"‚úó TODO: translate to English .txt TODO: translate to English")
        if all_txt_files:
            print(f"  (TODO: translate to English {len(all_txt_files)} TODO: translate to EnglishÔºåTODO: translate to English)")
        return None
    
    print(f"üìÑ TODO: translate to English {len(txt_files)} TODO: translate to English", end='')
    if len(all_txt_files) > len(txt_files):
        print(f" (TODO: translate to English {len(all_txt_files) - len(txt_files)} TODO: translate to English)")
    else:
        print()
    
    failed_count = 0
    for txt_file in txt_files:
        try:
            # TODO: translate to English
            content = None
            for encoding in ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']:
                try:
                    with open(txt_file, 'r', encoding=encoding) as f:
                        content = f.read().strip()
                    break  # TODO: translate to EnglishÔºåTODO: translate to English
                except UnicodeDecodeError:
                    continue
            
            if content is None:
                print(f"  ‚ö†Ô∏è TODO: translate to English {txt_file.name}")
                failed_count += 1
                continue
            
            if content:  # TODO: translate to English
                articles.append({
                    'filename': txt_file.name,
                    'text': content,
                    'language': language if language else 'unknown',
                    'label': label if label else 'unlabeled',
                    'source_path': str(article_dir),
                    'char_count': len(content),
                    'word_count': len(content.split())
                })
        except Exception as e:
            print(f"  ‚ö†Ô∏è TODO: translate to English {txt_file.name}: {e}")
            failed_count += 1
            continue
    
    print(f"‚úì TODO: translate to English {len(articles)} TODO: translate to English", end='')
    if failed_count > 0:
        print(f" (TODO: translate to English: {failed_count})")
    else:
        print()
    
    # TODO: translate to English
    if articles:
        total_chars = sum(a['char_count'] for a in articles)
        total_words = sum(a['word_count'] for a in articles)
        avg_chars = total_chars / len(articles)
        avg_words = total_words / len(articles)
        
        print(f"\nüìä TODO: translate to English:")
        print(f"  - TODO: translate to English: {len(articles)}")
        print(f"  - TODO: translate to English: {total_chars:,}")
        print(f"  - TODO: translate to English: {total_words:,}")
        print(f"  - TODO: translate to English/TODO: translate to English: {avg_chars:.0f}")
        print(f"  - TODO: translate to English/TODO: translate to English: {avg_words:.0f}")
    
    return articles


def load_multiple_sources(sources: list):
    """
    TODO: translate to English
    
    Args:
        sources: TODO: translate to EnglishÔºåTODO: translate to EnglishÔºåTODO: translate to English:
                - 'path': TODO: translate to English
                - 'language': Language codeÔºàTODO: translate to EnglishÔºâ
                - 'label': TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
    
    Returns:
        dict: TODO: translate to EnglishÔºåTODO: translate to English
    """
    all_articles = {}
    
    for source in sources:
        path = source['path']
        language = source.get('language')
        label = source.get('label', 'default')
        
        articles = load_articles_from_path(path, language, label)
        
        if articles:
            all_articles[label] = articles
    
    return all_articles


def show_article_sample(articles, index=0):
    """TODO: translate to English"""
    if not articles or index >= len(articles):
        print("‚úó TODO: translate to English")
        return
    
    article = articles[index]
    print(f"\n{'='*70}")
    print(f"TODO: translate to English [{index+1}/{len(articles)}]")
    print(f"{'='*70}")
    print(f"üìÑ TODO: translate to English: {article['filename']}")
    print(f"üåç TODO: translate to English: {article['language']}")
    print(f"üè∑Ô∏è  TODO: translate to English: {article['label']}")
    print(f"üìÇ TODO: translate to English: {article['source_path']}")
    print(f"üìè TODO: translate to English: {article['char_count']} TODO: translate to English, {article['word_count']} TODO: translate to English")
    # print(f"{'‚îÄ'*70}")
    # print("TODO: translate to English300TODO: translate to English:")
    # print(f"{'‚îÄ'*70}")
    # print(article['text'][:300])
    # if len(article['text']) > 300:
    #     print(f"\n... (TODO: translate to English {len(article['text']) - 300} TODO: translate to English)")
    # print(f"{'='*70}\n")


def print_summary(all_articles: dict):
    """TODO: translate to English"""
    print("\n" + "="*70)
    print("üìä TODO: translate to English")
    print("="*70)
    
    total_articles = 0
    for label, articles in all_articles.items():
        print(f"‚úì {label}: {len(articles)} TODO: translate to English")
        total_articles += len(articles)
    
    print(f"{'‚îÄ'*70}")
    print(f"TODO: translate to English: {total_articles} TODO: translate to English")
    print("="*70)




## TODO: translate to English

In [None]:
# TODO: translate to English
# ============================================================
# TODO: translate to EnglishRussianTODO: translate to EnglishPolishÔºàTODO: translate to EnglishÔºâ
# ============================================================

# # TODO: translate to English
# test_data_sources = [
#     {
#         'path': 'your_dev_articles_subtask_3_directory__ru',
#         'language': 'ru',
#         'label': 'ru'
#     },
#     {
#         'path': 'your_dev_articles_subtask_3_directory',
#         'language': 'po',
#         'label': 'po'
#     }
# ]

# # TODO: translate to English
# test_articles = load_multiple_sources(test_data_sources)

# ============================================================
# TODO: translate to EnglishRussianÔºàTODO: translate to EnglishRussianTODO: translate to EnglishÔºâ
# ============================================================

print("\n" + "üá∑üá∫"*35)
print("TODO: translate to EnglishRussianTODO: translate to English - TODO: translate to EnglishRussianTODO: translate to English")
print("üá∑üá∫"*35 + "\n")

results_ru = batch_detect_unified(
    # ============================================================
    # TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
    # ============================================================
    
    articles_dict={'ru': test_articles['ru']},
    # üìö TODO: translate to English
    # TODO: translate to EnglishÔºö{Language code: TODO: translate to English/TODO: translate to English}
    
    detector=context_detector_ru,
    # üîç TODO: translate to English
    # TODO: translate to Englishpropaganda
    # TODO: translate to EnglishÔºÅRussianTODO: translate to EnglishRussianTODO: translate to English
    # TODO: translate to EnglishÔºö
    # context_detector_po                  # TODO: translate to English
    # voting_detector_po_balanced          # TODO: translate to English-TODO: translate to English
    # voting_detector_po_conservative      # TODO: translate to English-TODO: translate to English  
    # voting_detector_po_aggressive        # TODO: translate to English-TODO: translate to English

    # # TODO: translate to EnglishÔºàTODO: translate to EnglishPolishTODO: translate to EnglishÔºâ
    # context_detector                     # = context_detector_po
    # voting_detector_balanced             # = voting_detector_po_balanced
    # voting_detector_conservative         # = voting_detector_po_conservative
    # voting_detector_aggressive           # = voting_detector_po_aggressive
    
    # ============================================================
    # TODO: translate to English
    # ============================================================
    
    detector_name="results_po_checkthat2024_dev_Ordinary_strict",
    # - TODO: translate to English
    # TODO: translate to EnglishÔºöresults_ru_RussianTODO: translate to English.tsv
    
    languages_to_test=['ru'],
    # üåç TODO: translate to English
    # TODO: translate to English articles_dict TODO: translate to English
    
    num_articles_per_language=1,
    # üìä TODO: translate to English
    # - 10: TODO: translate to English10TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
    # - None: TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
    
    # ============================================================
    # TODO: translate to English
    # ============================================================
    
    save_tsv=True,
    # üíæ TODO: translate to EnglishTSVTODO: translate to English
    # True: TODO: translate to English results_ru_RussianTODO: translate to English.tsv
    # TSVTODO: translate to EnglishÔºöarticle_id    start_pos    end_pos    techniques
    
    save_json=True,
    # üíæ TODO: translate to EnglishJSONTODO: translate to English
    # True: TODO: translate to English results_ru_RussianTODO: translate to English_detailed.json
    # TODO: translate to EnglishÔºöTODO: translate to English„ÄÅTODO: translate to English„ÄÅTODO: translate to English
    
    verbose=True,
    # üì¢ TODO: translate to English
    # True: TODO: translate to English
    #   "Processing article 1/10: article3428.txt"
    #   "‚úì TODO: translate to English: 3/13 paragraph(s)TODO: translate to English, total 8 TODO: translate to Englishtechnique instance(s)"
    # False: TODO: translate to English
    
    # ============================================================
    # TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
    # ============================================================
    
    # evaluator=None,
    # üéØ TODO: translate to EnglishÔºàTODO: translate to English None = TODO: translate to EnglishÔºâ
    # TODO: translate to EnglishÔºöTODO: translate to EnglishF1/TODO: translate to English/TODO: translate to English
    # NoneÔºöTODO: translate to EnglishÔºåTODO: translate to English
    
    # gold_annotations=None,
    # üìã TODO: translate to EnglishÔºàTODO: translate to English NoneÔºâ
    # TODO: translate to EnglishÔºö{'ru': {article_id: [TODO: translate to English]}}
    # TODO: translate to English
    
    # min_paragraph_length=50,
    # üìè TODO: translate to Englishparagraph(s)TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
    # TODO: translate to Englishparagraph(s)TODO: translate to English
    # TODO: translate to EnglishÔºö50
    
    # overlap_threshold=0.5,
    # üéöÔ∏è TODO: translate to English
    # TODO: translate to English
    # TODO: translate to EnglishÔºö0.5
)


# # ============================================================
# # TODO: translate to English
# # ============================================================

# print("\n" + "="*70)
# print("üìä TODO: translate to English")
# print("="*70)

# print(f"\n{'TODO: translate to English':<15} {'TODO: translate to English':<10} {'technique instance(s)':<10} {'TODO: translate to Englishparagraph(s)':<15}")
# print("-"*50)

# if 'ru' in results_ru and 'error' not in results_ru['ru']:
#     r = results_ru['ru']
#     print(f"{'Russian':<15} {r['num_articles_processed']:<10} {r['total_technique_instances']:<10} {r['paragraphs_with_techniques']}/{r['total_paragraphs']}")

# if 'po' in results_po and 'error' not in results_po['po']:
#     r = results_po['po']
#     print(f"{'Polish':<15} {r['num_articles_processed']:<10} {r['total_technique_instances']:<10} {r['paragraphs_with_techniques']}/{r['total_paragraphs']}")

# print("="*70)

In [20]:
# TODO: translate to English
test_data_sources = [
    {
        'path': 'your_dev_articles_subtask_3_directory__ru_2',
        'language': 'ru',
        'label': 'ru'
    },
    {
        'path': 'your_dev_articles_subtask_3_directory__po',
        'language': 'po',
        'label': 'po'
    },

    {
        'path': 'your_dev_articles_subtask_3_directory__en',
        'language': 'en',
        'label': 'en'
    }
]

# TODO: translate to English
test_articles = load_multiple_sources(test_data_sources)


üìÇ TODO: translate to English: /home/jovyan/2023task3/data/ru/dev-articles-subtask-3
üåç TODO: translate to English: üá∑üá∫ Russian
üè∑Ô∏è  TODO: translate to English: ru
üìÑ TODO: translate to English 48 TODO: translate to English (TODO: translate to English 1 TODO: translate to English)
‚úì TODO: translate to English 48 TODO: translate to English

üìä TODO: translate to English:
  - TODO: translate to English: 48
  - TODO: translate to English: 162,025
  - TODO: translate to English: 22,383
  - TODO: translate to English/TODO: translate to English: 3376
  - TODO: translate to English/TODO: translate to English: 466

üìÇ TODO: translate to English: /home/jovyan/2023task3/data/po/dev-articles-subtask-3
üåç TODO: translate to English: üáµüá± Polish
üè∑Ô∏è  TODO: translate to English: po
üìÑ TODO: translate to English 49 TODO: translate to English (TODO: translate to English 1 TODO: translate to English)
‚úì TODO: translate to English 49 TODO: translate to English

üìä TO

In [21]:
# ============================================================
# TODO: translate to English - TODO: translate to EnglishID + TODO: translate to English
# ============================================================

import time
from datetime import datetime

# ============================================================
# üéØ TODO: translate to English!
# ============================================================

LANGUAGE = 'en'      # TODO: translate to English: 'ru', 'po', 'en'
DETECTOR_TYPE = 'voting_aggressive'  # TODO: translate to English: 'context', 'voting_aggressiveTODO: translate to English', 'voting_balanced', 'voting_conservative'
BATCH_SIZE = 24
NUM_BATCHES = 4  # TODO: translate to English
SLEEP_TIME = 1

# ============================================================
# TODO: translate to English(TODO: translate to English)
# ============================================================

# TODO: translate to English
DETECTOR_CONFIG = {
    'ru': {
        'context': context_detector_ru,
        'voting_aggressive': voting_detector_ru_aggressive,
        'voting_balanced': voting_detector_ru_balanced,
        'voting_conservative': voting_detector_ru_conservative
    },
    'po': {
        'context': context_detector_po,
        'voting_aggressive': voting_detector_po_aggressive,
        'voting_balanced': voting_detector_po_balanced,
        'voting_conservative': voting_detector_po_conservative
    },
    'en': {
        'context': context_detector_en,
        'voting_aggressive': voting_detector_en_aggressive,
        'voting_balanced': voting_detector_en_balanced,
        'voting_conservative': voting_detector_en_conservative
    }
}

# TODO: translate to English
LANGUAGE_INFO = {
    'ru': {'name': 'Russian', 'flag': 'üá∑üá∫'},
    'po': {'name': 'Polish', 'flag': 'üáµüá±'},
    'en': {'name': 'English', 'flag': 'üá∫üá∏'}  # TODO: translate to English üá∫üá∏
}

# TODO: translate to English
DETECTOR_INFO = {
    'context': {'name': 'TODO: translate to English', 'icon': 'üîç'},
    'voting_aggressive': {'name': 'TODO: translate to English(TODO: translate to English)', 'icon': 'üöÄ'},
    'voting_balanced': {'name': 'TODO: translate to English(TODO: translate to English)', 'icon': '‚öñÔ∏è'},
    'voting_conservative': {'name': 'TODO: translate to English(TODO: translate to English)', 'icon': 'üõ°Ô∏è'}
}

# TODO: translate to English
OUTPUT_DIR = "your_llm_tests_directory"

# ============================================================
# TODO: translate to English(TODO: translate to English)
# ============================================================

# TODO: translate to English
data_var = None
data_key = None

if 'all_data' in dir():
    data_var = all_data
    # all_data TODO: translate to English 'ru_dev', 'po_dev', 'en_dev' TODO: translate to English
    if f'{LANGUAGE}_dev' in all_data:
        data_key = f'{LANGUAGE}_dev'
    elif LANGUAGE in all_data:
        data_key = LANGUAGE
elif 'test_articles' in dir():
    data_var = test_articles
    # test_articles TODO: translate to English 'ru', 'po', 'en' TODO: translate to English
    data_key = LANGUAGE

if data_var is None or data_key is None:
    print("‚ùå Error: TODO: translate to English")
    print("\nTODO: translate to English:")
    if 'all_data' in dir():
        print(f"  all_data TODO: translate to English: {list(all_data.keys())}")
    if 'test_articles' in dir():
        print(f"  test_articles TODO: translate to English: {list(test_articles.keys())}")
    raise ValueError("TODO: translate to English")

In [23]:
# ============================================================
# TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
# ============================================================

# TODO: translate to English
if LANGUAGE not in DETECTOR_CONFIG:
    print(f"‚ùå Error: TODO: translate to English '{LANGUAGE}'")
    print(f"   TODO: translate to English: {list(DETECTOR_CONFIG.keys())}")
    raise ValueError(f"TODO: translate to English: {LANGUAGE}")

if DETECTOR_TYPE not in DETECTOR_CONFIG[LANGUAGE]:
    print(f"‚ùå Error: TODO: translate to English '{DETECTOR_TYPE}'")
    print(f"   TODO: translate to English: {list(DETECTOR_CONFIG[LANGUAGE].keys())}")
    raise ValueError(f"TODO: translate to English: {DETECTOR_TYPE}")

# TODO: translate to English
detector = DETECTOR_CONFIG[LANGUAGE][DETECTOR_TYPE]
lang_info = LANGUAGE_INFO[LANGUAGE]
detector_info = DETECTOR_INFO[DETECTOR_TYPE]

# TODO: translate to English
articles = data_var[data_key]
total = len(articles)

# TODO: translate to English
print("="*70)
print(f"üöÄ TODO: translate to English - TODO: translate to EnglishID")
print("="*70)
print(f"TODO: translate to English: {data_key} (total {total} TODO: translate to English)")
print(f"TODO: translate to English: {lang_info['flag']} {lang_info['name']} ({LANGUAGE})")
print(f"TODO: translate to English: {detector_info['icon']} {detector_info['name']}")
print(f"TODO: translate to English: {NUM_BATCHES} TODO: translate to English √ó {BATCH_SIZE} TODO: translate to English/TODO: translate to English")
print(f"TODO: translate to English: {OUTPUT_DIR}")
print("="*70 + "\n")

# TODO: translate to English
all_results = []
start_time = time.time()

# for batch in range(1, NUM_BATCHES + 1):
for batch in range(2, 3): 
    start_idx = (batch - 1) * BATCH_SIZE
    end_idx = min(batch * BATCH_SIZE, total)
    batch_articles = articles[start_idx:end_idx]
    
    print(f"\n{lang_info['flag']} TODO: translate to English {batch}/{NUM_BATCHES}: TODO: translate to English {start_idx+1}-{end_idx} (total{len(batch_articles)}TODO: translate to English)")
    print(f"‚è∞ TODO: translate to English: {datetime.now().strftime('%H:%M:%S')}")
    

    # üÜï TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
    print(f"\nüìã TODO: translate to English:")
    for i, article in enumerate(batch_articles):
        article_id = article.get('filename', article.get('id', f'article_{start_idx + i + 1}'))
        if article_id.endswith('.txt'):
            article_id = article_id[:-4]
        print(f"   {i+1:2d}. {article_id}")
    
    print(f"\nüöÄ TODO: translate to English...")
    
    # TODO: translate to English
    result = batch_detect_unified(
        articles_dict={LANGUAGE: batch_articles},
        detector=detector,
        detector_name=f"results_semeval_task3_dev_{LANGUAGE}_{DETECTOR_TYPE}_batch{batch}",
        languages_to_test=[LANGUAGE],
        output_dir=OUTPUT_DIR,
        save_tsv=True,
        save_json=True,
        verbose=False
    )
    
    all_results.append(result)
    
    # TODO: translate to English
    if LANGUAGE in result and 'error' not in result[LANGUAGE]:
        r = result[LANGUAGE]
        elapsed = time.time() - start_time
        avg_time_per_batch = elapsed / batch
        remaining_time = avg_time_per_batch * (NUM_BATCHES - batch)
        
        print(f"\n‚úÖ TODO: translate to English {batch} TODO: translate to English!")
        print(f"   üìä TODO: translate to English: {r['num_articles_processed']}, TODO: translate to English: {r['total_technique_instances']}, TODO: translate to Englishparagraph(s): {r['paragraphs_with_techniques']}/{r['total_paragraphs']}")
        print(f"   ‚è±Ô∏è  TODO: translate to English: {r['elapsed_time']/60:.1f} TODO: translate to English")
        print(f"   üìà TODO: translate to English: {batch}/{NUM_BATCHES} ({batch/NUM_BATCHES*100:.0f}%)")
        
        if batch < NUM_BATCHES:
            print(f"   üîÆ TODO: translate to English: {remaining_time/60:.1f} TODO: translate to English")
        
        # üÜï TODO: translate to English
        print(f"   ‚úÖ TODO: translate to English: {', '.join([article.get('filename', article.get('id', f'article_{i}')).replace('.txt', '') for i, article in enumerate(batch_articles)])}")
        
    else:
        print(f"\n‚ùå TODO: translate to English {batch} TODO: translate to English")
        if 'error' in result.get(LANGUAGE, {}):
            print(f"   Error: {result[LANGUAGE]['error']}")
    
    # TODO: translate to English
    if batch < NUM_BATCHES and SLEEP_TIME > 0:
        print(f"\n‚è∏Ô∏è  TODO: translate to English {SLEEP_TIME} TODO: translate to English...")
        for i in range(SLEEP_TIME, 0, -1):
            if i % 10 == 0 or i <= 5:
                print(f"   ‚è≥ TODO: translate to English {i} TODO: translate to English...")
            time.sleep(1)

# TODO: translate to English
total_time = time.time() - start_time
successful = [r for r in all_results if LANGUAGE in r and 'error' not in r[LANGUAGE]]

print("\n" + "="*70)
print(f"üéâ TODO: translate to English! {lang_info['flag']} {lang_info['name']} - {detector_info['icon']} {detector_info['name']}")
print("="*70)

if successful:
    total_articles = sum(r[LANGUAGE]['num_articles_processed'] for r in successful)
    total_tech = sum(r[LANGUAGE]['total_technique_instances'] for r in successful)
    
    print(f"\nüìä TODO: translate to English:")
    print(f"  ‚úÖ TODO: translate to English: {len(successful)}/{NUM_BATCHES}")
    print(f"  üìù Processing article: {total_articles} TODO: translate to English")
    print(f"  üéØ TODO: translate to English: {total_tech} TODO: translate to English")
    print(f"  ‚è±Ô∏è  TODO: translate to English: {total_time/60:.1f} TODO: translate to English ({total_time/3600:.2f} TODO: translate to English)")
    print(f"  üöÄ TODO: translate to English: {total_time/total_articles:.1f} TODO: translate to English/TODO: translate to English")
    
    print(f"\nüíæ TODO: translate to English: {OUTPUT_DIR}/")
    for i in range(1, len(successful) + 1):
        print(f"    üìÑ TODO: translate to English{i}: results_semeval_task3_dev_{LANGUAGE}_{DETECTOR_TYPE}_batch{i}.tsv / .json") # TODO: translate to English
else:
    print(f"\n‚ùå TODO: translate to English")
    print(f"   TODO: translate to English")

print("\n" + "="*70)

# ============================================================
# üìñ TODO: translate to English
# ============================================================

"""
üéØ TODO: translate to English - TODO: translate to EnglishÔºÅ

1. TODO: translate to English:
   LANGUAGE = 'ru'    # Russian
   LANGUAGE = 'po'    # Polish

2. TODO: translate to English:
   DETECTOR_TYPE = 'context'              # TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
   DETECTOR_TYPE = 'voting_aggressive'    # TODO: translate to English-TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
   DETECTOR_TYPE = 'voting_balanced'      # TODO: translate to English-TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
   DETECTOR_TYPE = 'voting_conservative'  # TODO: translate to English-TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ
"""

üöÄ TODO: translate to English - TODO: translate to EnglishID
TODO: translate to English: en (total 90 TODO: translate to English)
TODO: translate to English: üá∫üá∏ English (en)
TODO: translate to English: üöÄ TODO: translate to English(TODO: translate to English)
TODO: translate to English: 4 TODO: translate to English √ó 24 TODO: translate to English/TODO: translate to English
TODO: translate to English: /home/jovyan/TRLAL/Total_work/research_projects/disinformation_detection/notebooks/results/LLM_tests


üá∫üá∏ TODO: translate to English 2/4: TODO: translate to English 25-48 (total24TODO: translate to English)
‚è∞ TODO: translate to English: 09:31:23

üìã TODO: translate to English:
    1. article833013834
    2. article817408115
    3. article813552066
    4. article832926076
    5. article832934428
    6. article817190270
    7. article833028146
    8. article824684605
    9. article833050243
   10. article814630609
   11. article829815104
   12. article824256050
   13. ar

"\nüéØ TODO: translate to English - TODO: translate to EnglishÔºÅ\n\n1. TODO: translate to English:\n   LANGUAGE = 'ru'    # Russian\n   LANGUAGE = 'po'    # Polish\n\n2. TODO: translate to English:\n   DETECTOR_TYPE = 'context'              # TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ\n   DETECTOR_TYPE = 'voting_aggressive'    # TODO: translate to English-TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ\n   DETECTOR_TYPE = 'voting_balanced'      # TODO: translate to English-TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ\n   DETECTOR_TYPE = 'voting_conservative'  # TODO: translate to English-TODO: translate to EnglishÔºàTODO: translate to EnglishÔºâ\n"