In [1]:
# Core imports and functions
import json
from openai import OpenAI
from read_dataset import read_all_documents
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment
from difflib import SequenceMatcher

# Import API key
from apikeys import open_ai_api_key

# Set up OpenAI client
client = OpenAI(api_key=open_ai_api_key)

def extract_metadata_with_llm(document_content: str, filename: str) -> dict:
    """Extract metadata from document content using OpenAI GPT-4.1"""
    max_content_length = 100000
    truncated_content = document_content[:max_content_length]
    
    prompt = f"""
    You are analyzing a Slovak government document. Please extract the following information and return it as a JSON object in English:
    
    Document filename: {filename}
    Document content: {truncated_content}
    
    Please extract and return the following information in JSON format:
    {{
        "project_name": "English translation of the project name",
        "project_name_slovak": "Original Slovak project name",
        "document_type": "Type of document (e.g., feasibility study, evaluation, financial analysis, etc.)",
        "sector": "Sector of the project. The Sector of the project is one of the following: Building, Transport, IT, Defense, Other: xxx (Specify if Other)",
        "summary": "Brief summary of the document content in English"
    }}
    
    Important notes:
    - If the document is in Slovak, translate the project name to English but also keep the original Slovak version
    - Be specific about the document type based on the content
    - Identify the sector/domain the project belongs to
    - Provide a concise but informative summary
    - Return only valid JSON, no additional text
    """
    
    try:
        response = client.chat.completions.create(
            model="gpt-4.1",
            messages=[
                {"role": "system", "content": "You are an expert document analyst specializing in Slovak government documents. Extract metadata accurately and return only valid JSON."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1,
            max_tokens=1000
        )
        
        content = response.choices[0].message.content.strip()
        
        try:
            metadata = json.loads(content)
            return metadata
        except json.JSONDecodeError:
            start_idx = content.find('{')
            end_idx = content.rfind('}') + 1
            if start_idx != -1 and end_idx != 0:
                json_str = content[start_idx:end_idx]
                metadata = json.loads(json_str)
                return metadata
            else:
                return {"error": "Could not parse JSON from LLM response", "raw_response": content}
                
    except Exception as e:
        return {"error": f"LLM processing failed: {str(e)}"}

def calculate_similarity(str1: str, str2: str) -> float:
    """Calculate similarity between two strings (0-1 scale) with null safety"""
    if not str1 or not str2:
        return 0.0
    return SequenceMatcher(None, str(str1).lower(), str(str2).lower()).ratio()

def find_fuzzy_matches(new_project_name: str, existing_projects: dict, threshold: float = 0.8) -> list:
    """Find existing projects that match the new project name above threshold"""
    if not new_project_name or not existing_projects:
        return []
    
    matches = []
    for canonical_name, project_data in existing_projects.items():
        if not canonical_name:
            continue
        similarity = calculate_similarity(new_project_name, canonical_name)
        if similarity >= threshold:
            matches.append({
                'canonical_name': canonical_name,
                'similarity': similarity,
                'project_data': project_data
            })
    return sorted(matches, key=lambda x: x['similarity'], reverse=True)

def ask_llm_for_project_matching(new_project_name: str, new_summary: str, existing_projects: dict) -> dict:
    """Ask LLM to determine if new project matches any existing projects"""
    if not new_project_name or not existing_projects:
        return {"is_match": False, "matched_project": None, "confidence": "low", "reasoning": "No project name or existing projects"}
    
    existing_projects_list = []
    for canonical_name, project_data in existing_projects.items():
        if not canonical_name:
            continue
        existing_projects_list.append({
            'name': canonical_name,
            'summary': project_data.get('summary', ''),
            'document_count': len(project_data.get('documents', []))
        })
    
    if not existing_projects_list:
        return {"is_match": False, "matched_project": None, "confidence": "low", "reasoning": "No valid existing projects"}
    
    prompt = f"""
    You are analyzing Slovak government projects. I have a new project and need to determine if it matches any existing projects.
    
    NEW PROJECT:
    Name: {new_project_name}
    Summary: {new_summary}
    
    EXISTING PROJECTS:
    {json.dumps(existing_projects_list, indent=2, ensure_ascii=False)}
    
    Please analyze if the new project is the same as any existing project. Consider:
    - Project names (even if slightly different)
    - Project content and scope
    - Government institutions involved
    - Geographic locations
    - Project phases/stages
    
    Return your analysis as JSON:
    {{
        "is_match": true/false,
        "matched_project": "canonical name if match found, null otherwise",
        "confidence": "high/medium/low",
        "reasoning": "brief explanation of your decision"
    }}
    
    Important: Projects can have different names but be the same project (e.g., different phases, updates, evaluations of the same initiative).
    """
    
    try:
        response = client.chat.completions.create(
            model="gpt-4.1",
            messages=[
                {"role": "system", "content": "You are an expert at analyzing Slovak government projects and identifying when documents belong to the same project despite different names."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1,
            max_tokens=500
        )
        
        content = response.choices[0].message.content.strip()
        
        try:
            result = json.loads(content)
            return result
        except json.JSONDecodeError:
            start_idx = content.find('{')
            end_idx = content.rfind('}') + 1
            if start_idx != -1 and end_idx != 0:
                json_str = content[start_idx:end_idx]
                result = json.loads(json_str)
                return result
            else:
                return {"is_match": False, "matched_project": None, "confidence": "low", "reasoning": "Could not parse LLM response"}
                
    except Exception as e:
        return {"is_match": False, "matched_project": None, "confidence": "low", "reasoning": f"LLM error: {str(e)}"}

def save_to_excel(data: list, filename: str, sheet_name: str = "Data"):
    """Save data to Excel with formatting"""
    df = pd.DataFrame(data)
    
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name=sheet_name, index=False)
        
        workbook = writer.book
        worksheet = writer.sheets[sheet_name]
        
        # Format headers
        header_font = Font(bold=True, color="FFFFFF")
        header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
        header_alignment = Alignment(horizontal="center", vertical="center")
        
        for cell in worksheet[1]:
            cell.font = header_font
            cell.fill = header_fill
            cell.alignment = header_alignment
        
        # Auto-adjust column widths
        for column in worksheet.columns:
            max_length = 0
            column_letter = column[0].column_letter
            
            for cell in column:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(str(cell.value))
                except:
                    pass
            
            adjusted_width = min(max_length + 2, 50)
            worksheet.column_dimensions[column_letter].width = adjusted_width
        
        # Format summary column to wrap text
        for row in worksheet.iter_rows(min_row=2, max_row=worksheet.max_row):
            if len(row) > 5:  # Summary column (0-indexed)
                summary_cell = row[5]
                summary_cell.alignment = Alignment(wrap_text=True, vertical="top")
    
    print(f"✅ Data saved to {filename}")

print("✅ All core functions loaded successfully!")
print("📋 Available functions:")
print("  - extract_metadata_with_llm()")
print("  - calculate_similarity()")
print("  - find_fuzzy_matches()")
print("  - ask_llm_for_project_matching()")
print("  - save_to_excel()")


✅ All core functions loaded successfully!
📋 Available functions:
  - extract_metadata_with_llm()
  - calculate_similarity()
  - find_fuzzy_matches()
  - ask_llm_for_project_matching()
  - save_to_excel()


In [2]:
# REAL-TIME document processing with immediate project consolidation
print("🚀 Starting REAL-TIME document processing with project consolidation...")
print("Using GPT-4.1 for analysis + immediate project matching")
print("Processing documents one by one with live consolidation")
print("=" * 60)

# Read all documents
documents = read_all_documents("Data")
all_metadata = []
canonical_projects = {}  # Running tally of consolidated projects

print(f"Processing {len(documents)} documents with real-time consolidation...")

for i, (filename, content) in enumerate(documents.items(), 1):
    print(f"\n📄 Processing Document {i}/{len(documents)}: {filename}")
    print(f"Content length: {len(content)} characters")
    
    # Extract metadata using LLM
    metadata = extract_metadata_with_llm(content, filename)
    metadata["filename"] = filename
    metadata["content_length"] = len(content)
    
    if 'error' in metadata:
        print(f"  ❌ Error: {metadata['error']}")
        all_metadata.append(metadata)
        continue
    
    project_name = metadata.get('project_name', '')
    summary = metadata.get('summary', '')
    
    print(f"  📋 Extracted Project: {project_name}")
    
    # REAL-TIME CONSOLIDATION: Check if this project matches existing ones
    print(f"  🔍 Checking for project matches...")
    
    # Step 1: Fuzzy matching
    fuzzy_matches = find_fuzzy_matches(project_name, canonical_projects, threshold=0.8)
    
    if fuzzy_matches:
        print(f"    🔍 Found {len(fuzzy_matches)} fuzzy matches:")
        for match in fuzzy_matches:
            print(f"      - {match['canonical_name']} (similarity: {match['similarity']:.2f})")
    
    # Step 2: LLM matching (compare against ALL existing projects)
    llm_result = ask_llm_for_project_matching(project_name, summary, canonical_projects)
    
    print(f"    🤖 LLM Analysis:")
    print(f"      - Match: {llm_result['is_match']}")
    print(f"      - Confidence: {llm_result['confidence']}")
    print(f"      - Reasoning: {llm_result['reasoning']}")
    
    # Step 3: Decision logic
    matched_project = None
    decision_reason = ""
    
    if llm_result['is_match'] and llm_result['matched_project']:
        # LLM found a match
        matched_project = llm_result['matched_project']
        decision_reason = f"LLM match (confidence: {llm_result['confidence']})"
    elif fuzzy_matches and fuzzy_matches[0]['similarity'] >= 0.9:
        # Very high fuzzy match
        matched_project = fuzzy_matches[0]['canonical_name']
        decision_reason = f"High fuzzy match ({fuzzy_matches[0]['similarity']:.2f})"
    else:
        # No clear match - create new project
        matched_project = None
        decision_reason = "No match found - creating new project"
    
    # Step 4: Update canonical projects
    if matched_project:
        # Add to existing project
        canonical_projects[matched_project]['documents'].append({
            'filename': filename,
            'project_name': project_name,
            'project_name_slovak': metadata.get('project_name_slovak', ''),
            'document_type': metadata.get('document_type', ''),
            'sector': metadata.get('sector', ''),
            'summary': summary,
            'content_length': metadata.get('content_length', 0)
        })
        
        # Update canonical name if new name is better (longer, more descriptive)
        if len(project_name) > len(matched_project) and 'update' not in project_name.lower():
            old_name = matched_project
            canonical_projects[project_name] = canonical_projects.pop(old_name)
            canonical_projects[project_name]['canonical_name'] = project_name
            print(f"    📝 Updated canonical name: '{old_name}' → '{project_name}'")
        
        print(f"    ✅ Added to existing project: {matched_project}")
    else:
        # Create new project
        canonical_projects[project_name] = {
            'canonical_name': project_name,
            'project_name_slovak': metadata.get('project_name_slovak', ''),
            'sector': metadata.get('sector', ''),
            'documents': [{
                'filename': filename,
                'project_name': project_name,
                'project_name_slovak': metadata.get('project_name_slovak', ''),
                'document_type': metadata.get('document_type', ''),
                'sector': metadata.get('sector', ''),
                'summary': summary,
                'content_length': metadata.get('content_length', 0)
            }]
        }
        print(f"    🆕 Created new project: {project_name}")
    
    print(f"    📊 Decision: {decision_reason}")
    
    # Add to all_metadata for compatibility
    all_metadata.append(metadata)
    
    # Show current consolidation status
    print(f"    📈 Current Status: {len(canonical_projects)} consolidated projects")
    print("-" * 60)

print(f"\n🎉 Real-time processing complete!")
print(f"Total documents processed: {len(all_metadata)}")
print(f"Total consolidated projects: {len(canonical_projects)}")

# Show summary
successful_extractions = [m for m in all_metadata if 'error' not in m]
failed_extractions = [m for m in all_metadata if 'error' in m]

print(f"\n📊 Summary:")
print(f"✅ Successful extractions: {len(successful_extractions)}")
print(f"❌ Failed extractions: {len(failed_extractions)}")
print(f"🔄 Documents consolidated: {len(all_metadata) - len(canonical_projects)}")

print(f"\n📋 Final Consolidated Projects:")
for i, (project_name, project_data) in enumerate(canonical_projects.items(), 1):
    doc_count = len(project_data['documents'])
    print(f"  {i}. {project_name}")
    print(f"     Documents: {doc_count}")
    print(f"     Sector: {project_data.get('sector', 'N/A')}")
    if doc_count > 1:
        print(f"     Files: {', '.join([doc['filename'] for doc in project_data['documents']])}")
    print()


INFO:read_dataset:Reading document: 164039.pdf
INFO:read_dataset:Reading document: hodnotenie_d1-bidovce-statna-hranica-sk_ua.pdf


🚀 Starting REAL-TIME document processing with project consolidation...
Using GPT-4.1 for analysis + immediate project matching
Processing documents one by one with live consolidation


INFO:read_dataset:Reading document: Inform cia o zmluvnej cene Ba-Tri.pdf
INFO:read_dataset:Reading document: C_1_10_2_Prinosy_stavby_final.pdf
INFO:read_dataset:Reading document: C_1_4_2_D_Kapacitne_posudenie.pdf
INFO:read_dataset:Reading document: E_1_1a_Stage_Summary_in_English_Language.pdf
INFO:read_dataset:Reading document: Hodnotenie_Najom_MZSR_20250702.pdf
INFO:read_dataset:Reading document: R1-BB-Slovenska-Lupca_Aktualizacia_20230302.pdf
INFO:read_dataset:Reading document: Príloha č. 3 - Podrobné informácie o analyzovaných administratívnych priestoroch.docx.pdf
INFO:read_dataset:Reading document: 2.3_Ortofotomapa_km_0,000-11,500_II._usek.pdf
INFO:read_dataset:Reading document: C_1_5_9_VYSLEDKY_LABORATORNYCH_SKUSOK_ZEMIN_A_HORNIN.pdf
INFO:read_dataset:Reading document: C_1_11_Hodnotenie_Natura_2000.pdf
INFO:read_dataset:Reading document: C_1_1_1_Zivotne_prostredie.pdf
INFO:read_dataset:Reading document: Súčasť prílohy č. 4 - Pôdorys jedáleň + suterén budovy na uli

Processing 103 documents with real-time consolidation...

📄 Processing Document 1/103: 164039.pdf
Content length: 11972 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Reconstruction of the Hviezda Dormitory – Stage II
  🔍 Checking for project matches...
    🤖 LLM Analysis:
      - Match: False
      - Confidence: low
      - Reasoning: No project name or existing projects
    🆕 Created new project: Reconstruction of the Hviezda Dormitory – Stage II
    📊 Decision: No match found - creating new project
    📈 Current Status: 1 consolidated projects
------------------------------------------------------------

📄 Processing Document 2/103: hodnotenie_d1-bidovce-statna-hranica-sk_ua.pdf
Content length: 29963 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Value for Money Assessment of the D1 Corridor Bidovce – State Border SK/UA
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns a value for money assessment of the D1 motorway corridor from Bidovce to the Slovak-Ukrainian border, focusing on transport infrastructure, feasibility studies, and cost-benefit analysis. The only existing project is the 'Reconstruction of the Hviezda Dormitory – Stage II,' which is unrelated in terms of name, content, scope, institution, and geographic location. There is no overlap in project type (infrastructure vs. building renovation), location, or responsible ministry. Therefore, these are clearly distinct projects.
    🆕 Created new project: Value for Money Assessment of the D1 Corridor Bidovce – State Border SK/UA
    📊 Decision: No match found - creating new project
    📈 Current Status: 2 consolidated projects
------------------------------------------------------------

📄 Processing Document 3/103: Inform cia o zmluvnej cene Ba-Tri.pdf
Content length: 685 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Extension of the D1 Motorway Bratislava - Triblavina
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns the extension of the D1 motorway between Bratislava and Triblavina, focusing on contractual and construction details for this specific section. The existing projects are: (1) a reconstruction of a dormitory (unrelated in scope and location), and (2) a value for money assessment of a different D1 section (Bidovce – State Border SK/UA), which is geographically distant from Bratislava – Triblavina. There is no overlap in project scope, location, or phase. Therefore, the new project does not match any existing project.
    🆕 Created new project: Extension of the D1 Motorway Bratislava - Triblavina
    📊 Decision: No match found - creating new project
    📈 Current Status: 3 consolidated projects
------------------------------------------------------------

📄 Processing Document 4/103: C_1_10_2_Prinosy_stavby_final.pdf
Content length: 122877 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns the D3 motorway section between Žilina (Brodno) and Kysucké Nové Mesto, which is part of the D3 corridor in northern Slovakia. None of the existing projects reference the D3 motorway or the relevant geographic locations (Žilina, Kysucké Nové Mesto, Čadca). The existing projects are focused on the D1 motorway (different corridor and locations), a dormitory reconstruction, and a value-for-money assessment for the D1 corridor near the Slovak-Ukrainian border. There is no overlap in project name, content, scope, or geographic area.
    🆕 Created new project: D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: No match found - creating new project
    📈 Current Status: 4 consolidated projects
------------------------------------------------------------

📄 Processing Document 5/103: C_1_4_2_D_Kapacitne_posudenie.pdf
Content length: 19579

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
  🔍 Checking for project matches...
    🔍 Found 1 fuzzy matches:
      - D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto (similarity: 1.00)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project name exactly matches one of the existing projects, including the specific section (Žilina (Brodno) – Kysucké Nové Mesto) and stage (Stage I). The content of the new project (capacity assessment study for this motorway section) aligns with the scope and geographic location of the existing project. There are no discrepancies in naming, location, or phase, indicating they are the same project.
    ✅ Added to existing project: D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 4 consolidated projects
------------------------------------------------------------

📄 Processing Document 6/103: E_1_1a_Stage_Summary_in_English_Language.pdf
Content length: 30199 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
  🔍 Checking for project matches...
    🔍 Found 1 fuzzy matches:
      - D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto (similarity: 0.96)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project name exactly matches one of the existing projects, including the specific section (Žilina (Brodno) – Kysucké Nové Mesto) and stage (Stage I). The summary confirms the scope is a feasibility study for this section of the D3 motorway, which aligns with the existing project's geographic location, phase, and infrastructure focus. There is no indication of a different phase, location, or scope that would distinguish it from the existing project.
    ✅ Added to existing project: D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 4 consolidated projects
------------------------------------------------------------

📄 Processing Document 7/103: Hodnotenie_Najom_MZSR_20250702.pdf
Content length: 26749 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Optimization of Spatial Arrangements in the Health Sector
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns the optimization and consolidation of office space for the Ministry of Health and the General Health Insurance Company in Bratislava, focusing on office leases and spatial efficiency. All existing projects are infrastructure-related (dormitory reconstruction, motorway construction or extension) and do not involve the health sector, office space, or the Bratislava administrative area in the described context. There is no overlap in project content, institutions, or geographic focus.
    🆕 Created new project: Optimization of Spatial Arrangements in the Health Sector
    📊 Decision: No match found - creating new project
    📈 Current Status: 5 consolidated projects
------------------------------------------------------------

📄 Processing Document 8/103: R1-BB-Slovenska-Lupca_Aktualizacia_20230302.pdf
Content length: 2749 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: R1 Banská Bystrica – Slovenská Ľupča - update before public procurement
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns the R1 expressway section between Banská Bystrica and Slovenská Ľupča, focusing on an updated economic evaluation before procurement. None of the existing projects reference the R1 expressway, Banská Bystrica, or Slovenská Ľupča. The existing projects are about different highways (D1, D3), a dormitory, and health sector arrangements, with no overlap in geographic location, project scope, or naming. Therefore, there is no match.
    🆕 Created new project: R1 Banská Bystrica – Slovenská Ľupča - update before public procurement
    📊 Decision: No match found - creating new project
    📈 Current Status: 6 consolidated projects
------------------------------------------------------------

📄 Processing Document 9/103: Príloha č. 3 - Podrobné informácie o analyzovaných administratívnych priestoroch.docx.pdf
Content length: 6823 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Annex No. 3 - Detailed Information on Analyzed Administrative Premises
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project focuses on the detailed evaluation and future use recommendations for several administrative buildings in Bratislava, including technical, investment, and market analyses. None of the existing projects reference administrative premises, Bratislava buildings, or property management. The existing projects are related to motorway construction, dormitory reconstruction, and spatial arrangements in the health sector, none of which align in content, scope, or geographic focus with the new project. There is no indication that the new project is a phase or component of any listed project.
    🆕 Created new project: Annex No. 3 - Detailed Information on Analyzed Administrative Premises
    📊 Decision: No match found - creating new project
    📈 Current Status: 7 consolidated projects
------------------------------------------------------------

📄 Processing Document 10/103: 2.3_Ortofotomapa_km_0

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Orthophotomap km 0.000-11.500, Section II
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: medium
      - Reasoning: The new project concerns an orthophotomap for kilometers 0.000–11.500 of 'Section II' of a transport infrastructure project, likely a road or railway. Among the existing projects, only those related to transport infrastructure (D1, D3, R1) are potentially relevant. However, the new project does not specify a route name (e.g., D1, D3, R1) or geographic location, and none of the existing projects explicitly mention a 'Section II' or a matching kilometer range. Without more specific geographic or route information, there is insufficient evidence to confidently match the new project to any existing one.
    🆕 Created new project: Orthophotomap km 0.000-11.500, Section II
    📊 Decision: No match found - creating new project
    📈 Current Status: 8 consolidated projects
------------------------------------------------------------

📄 Processing Document 11/103: C_1_5_9_VYSLEDKY_LABORATORNYCH_SKUSOK_ZEMIN_A

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Motorway Žilina (Brodno) – Čadca – Stage I
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 Motorway section from Žilina (Brodno) to Čadca, specifically Stage I. The new project summary describes geotechnical investigations for this section, while the existing project name specifies Stage I and the segment Žilina (Brodno) – Kysucké Nové Mesto, which is a town located between Žilina (Brodno) and Čadca. The overlap in geographic location, motorway section, and stage strongly indicate these documents are part of the same project, possibly focusing on different sub-segments or aspects (e.g., feasibility vs. design), but within the same overall infrastructure initiative.
    ✅ Added to existing project: D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 8 consolidated projects
------------------------------------------------------------

📄 P

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Assessment of Impacts on Natura 2000 Sites – D3 Motorway Žilina (Brodno) – Kysucké Nové Mesto
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project evaluates the environmental impacts of the D3 motorway section between Žilina (Brodno) and Kysucké Nové Mesto, which exactly matches the geographic scope and section described in the existing project 'D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto'. The focus on Natura 2000 sites and environmental assessment is a typical sub-activity or documentation phase within the broader motorway project. The naming conventions differ slightly due to the focus (EIA vs. construction phase), but both refer to the same infrastructure section and project phase. No other existing project matches in location or scope.
    📝 Updated canonical name: 'D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto' → 'Assessment of Impacts on Natura 2000 Sites – D3 Motorway Žilina (Brodno) – Kysucké Nové Mesto'
    ✅ Added to existing project: D3 Motorway

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: Both the new project and the existing project focus on the D3 motorway section between Žilina (Brodno) and Kysucké Nové Mesto. The new project is a feasibility study for Stage I of this section, while the existing project is an assessment of environmental impacts (specifically on Natura 2000 sites) for the same section. Although the document types differ (feasibility study vs. environmental assessment), both are part of the same overarching infrastructure initiative concerning the D3 motorway between these two locations. The geographic scope, infrastructure type, and project phase alignment strongly indicate they are documents from the same project pipeline.
    📝 Updated canonical name: 'Assessment of Impacts on Natura 2000 Sites – D3 Motorway Žilina (Brodno) – Kysucké Nové Mesto' → 'Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké No

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Floor Plan of the Canteen and Basement of the Building at Limbová 2
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: medium
      - Reasoning: The new project concerns architectural floor plans for the canteen and basement of a building at Limbová 2. None of the existing projects explicitly reference Limbová 2, canteens, or building floor plans. The closest possible match could be 'Annex No. 3 - Detailed Information on Analyzed Administrative Premises' or 'Optimization of Spatial Arrangements in the Health Sector', as these might involve building layouts or administrative spaces, potentially in the health sector. However, there is insufficient information in the existing project summaries to confirm a direct connection to Limbová 2 or the specific canteen/basement floor plan. Therefore, based on available data, the new project does not match any existing project.
    🆕 Created new project: Floor Plan of the Canteen and Basement of the Building at Limbová 2
    📊 Decision: No match found - creating new project
    📈 Current Status: 9 consoli

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina (Brodno) - Čadca
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 motorway section from Žilina (Brodno) to Čadca. The new project describes a feasibility study for Stage I of this section, which aligns with the existing project's name and scope. The mention of 'Stage I' and the geographic endpoints (Žilina (Brodno) – Čadca) are consistent. The existing project further specifies 'Kysucké Nové Mesto' as an intermediate point, which is located between Žilina (Brodno) and Čadca, indicating that both projects refer to the same motorway development phase. Therefore, these documents are part of the same overarching project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 9 consolidated projects
-------------------------------------------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D1 Motorway Bratislava – Senec, Expansion to 6 Lanes, Section 1 km 0.000 – 3.638 (13.600 – 17.238 D1)
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns the expansion of the D1 motorway between Bratislava and Senec to six lanes, specifically Section 1 (km 0.000 – 3.638). Among the existing projects, only one references the D1 motorway: 'Extension of the D1 Motorway Bratislava - Triblavina.' However, Triblavina is a different section/interchange further northeast from Senec, and there is no mention of the Bratislava–Senec segment or a six-lane expansion in the existing project names. No other existing project references the D1 corridor in the Bratislava–Senec area. Therefore, there is no match.
    🆕 Created new project: D1 Motorway Bratislava – Senec, Expansion to 6 Lanes, Section 1 km 0.000 – 3.638 (13.600 – 17.238 D1)
    📊 Decision: No match found - creating new project
    📈 Current Status: 10 consolidated projects
------------------------------------------------------------

📄 Processing Document 17/103: Príloha č. 7 Ži

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina, Brodno - Kysucké Nové Mesto
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 motorway section from Žilina (Brodno) to Kysucké Nové Mesto. The new document is an update to the feasibility study for this section, prepared before contract signing, and covers technical parameters, investment costs, and recommendations from relevant ministries. The existing project explicitly refers to the feasibility study for the same section and stage. The geographic locations, project scope, and phase (feasibility/update before procurement) all align, indicating these documents are part of the same project initiative.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 10 consolidated projects
------------------------------------------------------------

📄 Proces

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Eastern Slovak Oncology Institute – Construction of a New Surgical and Outpatient Pavilion
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns the construction of a new surgical and outpatient pavilion at the Eastern Slovak Oncology Institute (VOÚ), focused on healthcare infrastructure in eastern Slovakia. None of the existing projects reference oncology, the VOÚ, healthcare facility construction, or related investment in the Košice region. The existing projects are primarily related to transportation infrastructure (motorways, corridors), administrative premises, or unrelated buildings (e.g., dormitory, canteen). There is no overlap in project name, content, scope, institution, or location. Therefore, the new project does not match any existing project.
    🆕 Created new project: Eastern Slovak Oncology Institute – Construction of a New Surgical and Outpatient Pavilion
    📊 Decision: No match found - creating new project
    📈 Current Status: 11 consolidated projects
-------------------------------------------------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Motorway Žilina (Brodno) – Kysucké Nové Mesto
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project concerns the D3 Motorway section between Žilina (Brodno) and Kysucké Nové Mesto, which exactly matches the geographic scope and section described in the existing project 'Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto.' The new document is a supplementary engineering-geological and hydrogeological survey, which is a typical follow-up or supporting document for a feasibility study or subsequent project phases. Both projects refer to the same motorway section and likely involve the same government stakeholders. The difference in document type (feasibility study vs. survey report) reflects different project phases or supporting studies within the same overall project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – K

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: None
  🔍 Checking for project matches...
    🤖 LLM Analysis:
      - Match: False
      - Confidence: low
      - Reasoning: No project name or existing projects
    🆕 Created new project: None
    📊 Decision: No match found - creating new project
    📈 Current Status: 12 consolidated projects
------------------------------------------------------------

📄 Processing Document 21/103: 1.11.3 Intenzity dopravy.pdf
Content length: 0 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Traffic Intensities
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project 'Traffic Intensities' is a general analysis of traffic flows and patterns, supporting transport planning and infrastructure development. The existing projects are either specific infrastructure projects (e.g., motorway extensions, feasibility studies for particular road sections), building reconstructions, or health sector projects. None of the existing projects are described as general traffic analysis or data collection initiatives. There is no indication that 'Traffic Intensities' is a phase, update, or evaluation of any of the listed projects. The scope and content do not overlap directly with any existing project.
    🆕 Created new project: Traffic Intensities
    📊 Decision: No match found - creating new project
    📈 Current Status: 13 consolidated projects
------------------------------------------------------------

📄 Processing Document 22/103: C_1_3_Emisna_studia.pdf
Content 

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Highway D3 Žilina (Brodno) – Čadca, Stage I – Žilina – Kysucké Nové Mesto
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 highway section from Žilina (Brodno) to Čadca, specifically Stage I between Žilina (Brodno) and Kysucké Nové Mesto. The new document is an emission dispersion study, which is a typical component of the environmental impact assessment process for large infrastructure projects like highways. The existing project is a feasibility study for the same section and stage. Both projects share the same geographic scope, infrastructure type, and likely involve the same government institutions. The difference in document type (feasibility study vs. emission study) reflects different aspects or phases of the same overall project. Therefore, the new project document should be grouped under the existing project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysuc

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Update: Development Concept of the F. D. Roosevelt Faculty Hospital with Polyclinic Banská Bystrica
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: None of the existing projects reference the F. D. Roosevelt Faculty Hospital with Polyclinic in Banská Bystrica, nor do they involve hospital construction or reconstruction in Banská Bystrica. The only other healthcare-related project is the 'Eastern Slovak Oncology Institute – Construction of a New Surgical and Outpatient Pavilion,' which is a different institution in a different region (Eastern Slovakia, not Banská Bystrica). No existing project matches the name, location, institution, or scope of the new project.
    🆕 Created new project: Update: Development Concept of the F. D. Roosevelt Faculty Hospital with Polyclinic Banská Bystrica
    📊 Decision: No match found - creating new project
    📈 Current Status: 14 consolidated projects
------------------------------------------------------------

📄 Processing Document 24/103: 2.4_Ortofotomapa_km_11,500-16,772_II.III.IV._usek.pdf
Content length: 0 c

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Orthophotomap km 11,500-16,772, Sections II, III, IV
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both refer to orthophotomap (aerial imagery) documentation for sequential kilometer segments (0.000-11.500 and 11.500-16.772) and both mention Section II. This strongly suggests they are parts of the same overarching infrastructure project, likely related to the same transport corridor, with the new project covering the next segment and additional sections (II, III, IV). The naming convention, content, and scope are highly consistent, indicating these are sequential phases or deliverables within the same project.
    📝 Updated canonical name: 'Orthophotomap km 0.000-11.500, Section II' → 'Orthophotomap km 11,500-16,772, Sections II, III, IV'
    ✅ Added to existing project: Orthophotomap km 0.000-11.500, Section II
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 14 consolidated projects
----------------------------------------------------------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Time Schedule
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project concerns the relocation and office space optimization for major Slovak health sector institutions (VŠZP, NCZI, MZ SR), including timelines and space requirements. The existing project 'Optimization of Spatial Arrangements in the Health Sector' is a canonical, broad-scope project likely encompassing such activities. The overlap in sector (health), institutions, and the focus on spatial/office arrangements strongly indicate these documents are part of the same initiative, possibly as different phases or supporting documents (e.g., schedules, analyses, implementation plans).
    ✅ Added to existing project: Optimization of Spatial Arrangements in the Health Sector
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 14 consolidated projects
------------------------------------------------------------

📄 Processing Document 26/103: Analėza hodnoty za peniaze projektu D4R7.pdf


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Value for Money Analysis of the D4/R7 PPP Project
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns a Value for Money analysis of the D4/R7 PPP project (D4 motorway Jarovce – Rača and R7 expressway Bratislava Prievoz – Holice). None of the existing projects reference the D4 or R7 corridors, PPP models, or the relevant geographic locations. Existing projects focus on other motorways (D1, D3, R1), health sector, or unrelated infrastructure. There is no overlap in project name, content, or scope.
    🆕 Created new project: Value for Money Analysis of the D4/R7 PPP Project
    📊 Decision: No match found - creating new project
    📈 Current Status: 15 consolidated projects
------------------------------------------------------------

📄 Processing Document 27/103: C_1_5_2_1_SITUACIA_PRIESKUMNYCH_DIEL.pdf
Content length: 438 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Overview of the Route Section D3 - Žilina (Brodno) - Kysucké Nové Mesto
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both focus on the D3 motorway section between Žilina (Brodno) and Kysucké Nové Mesto. The new project provides an overview and technical details of this section, while the existing project is a feasibility study for the same segment (Stage I). The geographic location, project scope, and phase (planning/analysis) all align. The difference in naming reflects a focus on different aspects (overview vs. feasibility study), but both clearly pertain to the same infrastructure project and phase.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 15 consolidated projects
------------------------------------------------------------

📄 Processing Document 28/103: C_1_6_Vypocty_vedenia_tras.pdf
Conte

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Motorway Žilina (Brodno) - Čadca
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: medium
      - Reasoning: The new project and the existing project both concern the D3 motorway section between Žilina (Brodno) and Čadca in the Žilina region. The existing project specifically mentions 'Stage I: Žilina (Brodno) – Kysucké Nové Mesto', which is a subsection of the full Žilina (Brodno) – Čadca route. The new project appears to cover the entire section from Žilina (Brodno) to Čadca, possibly including or building upon the previously studied Stage I. Both projects involve technical documentation and feasibility/planning studies for the same motorway corridor. However, since the new project may encompass a broader scope or a later phase, the match is not certain but is likely, hence the medium confidence.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: medium)
    

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Motorway Žilina (Brodno) – Čadca
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 motorway section between Žilina (Brodno) and Čadca in the Žilina region. The existing project specifically references Stage I (Žilina (Brodno) – Kysucké Nové Mesto), which is a subsection of the full Žilina (Brodno) – Čadca route. The new project appears to be a broader feasibility study for the entire section, potentially encompassing multiple stages. Both projects involve similar technical, environmental, and planning considerations. The overlap in geographic location, infrastructure type, and project scope strongly indicate they are part of the same overall motorway initiative, possibly at different phases or levels of detail.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Curr

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto – Climate Change Risk Assessment
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both refer to the D3 motorway section from Žilina (Brodno) to Kysucké Nové Mesto, Stage I. The new document is a climate change risk assessment for this section, while the existing project is a feasibility study for its construction. Both documents concern the same infrastructure project, likely commissioned by the same government institutions, and cover the same geographic area and project phase. The climate risk assessment is a specialized study that would typically be included as part of the broader feasibility and planning process for such a motorway project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 15 consolidated projects
---------------------------------------------------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Feasibility Study: Development Concept of the Eastern Slovak Oncology Institute in Košice
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project is a feasibility study for the development concept of the Eastern Slovak Oncology Institute (VOÚ) in Košice, focusing on the need for modernization and specifically recommending the construction of a new pavilion to improve oncology care. The existing project 'Eastern Slovak Oncology Institute – Construction of a New Surgical and Outpatient Pavilion' directly references the same institution, location, and the construction of a new pavilion. The feasibility study is a typical preparatory phase for such construction projects, indicating both documents are part of the same overall initiative to expand and modernize the VOÚ in Košice.
    ✅ Added to existing project: Eastern Slovak Oncology Institute – Construction of a New Surgical and Outpatient Pavilion
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 15 consolidated projects
---------------------------------------------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Procurement of interoperable multi-system electric locomotives and trainsets for long-distance transport
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns the procurement (via lease) of interoperable multi-system electric locomotives and trainsets for long-distance rail transport by Železničná spoločnosť Slovensko, a.s. (ZSSK). None of the existing projects reference rolling stock procurement, ZSSK, or related railway vehicle modernization. The existing projects focus on highways, hospital infrastructure, administrative buildings, and oncology pavilions, with no overlap in content, scope, or responsible institution. There is no indication that any of the existing projects are updates, phases, or evaluations of a rolling stock procurement project.
    🆕 Created new project: Procurement of interoperable multi-system electric locomotives and trainsets for long-distance transport
    📊 Decision: No match found - creating new project
    📈 Current Status: 16 consolidated projects
-------------------------------------------------------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Value for Money Assessment of the Supreme Court of the Slovak Republic Project
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns the value for money assessment and planned comprehensive reconstruction of the Supreme Court building at Župné námestie, Bratislava, led by the Ministry of Finance. None of the existing projects reference the Supreme Court, judicial buildings, or related infrastructure in Bratislava. Existing projects focus on highways, hospitals, dormitories, oncology institutes, and railway procurement, with no overlap in institution, location, or project scope. No existing project appears to be an earlier or related phase, evaluation, or update of the Supreme Court reconstruction.
    🆕 Created new project: Value for Money Assessment of the Supreme Court of the Slovak Republic Project
    📊 Decision: No match found - creating new project
    📈 Current Status: 17 consolidated projects
------------------------------------------------------------

📄 Processing Document 34/103: C_1_5_10_VYSLEDKY

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina (Brodno) – Čadca – Stage I
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 motorway section from Žilina (Brodno) to Čadca, Stage I. The new project focuses on laboratory tests and geotechnical data as part of the feasibility study for this section, which aligns with the scope of the existing project. The slight difference in naming (the existing project specifies 'Žilina (Brodno) – Kysucké Nové Mesto' as Stage I, while the new project says 'Žilina (Brodno) – Čadca – Stage I') is likely due to alternative naming conventions or sub-section focus, but both clearly refer to the same motorway construction initiative in the same geographic area and phase. Therefore, they are considered the same project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current St

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Orthophotomap km 12,500-23,500, Section I
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both involve orthophotomap imagery for specific kilometer sections of a transport infrastructure project. The existing project covers km 11,500-16,772 (Sections II, III, IV), while the new project covers km 12,500-23,500 (Section I). The overlap in kilometer markers (12,500-16,772) and the identical type of work (orthophotomap/geospatial data for infrastructure planning) strongly indicate they are part of the same overarching project, likely divided into sections or phases. The naming convention and technical scope are highly consistent.
    ✅ Added to existing project: Orthophotomap km 11,500-16,772, Sections II, III, IV
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 17 consolidated projects
------------------------------------------------------------

📄 Processing Document 36/103: C_1_10_3_Cena_verejnej_prace.pdf
Content length: 18637 charac

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project share nearly identical names, both referencing the D3 motorway between Žilina (Brodno) and Kysucké Nové Mesto, specifically Stage I. The content described in the new project (feasibility study, cost breakdown, socio-economic evaluation) matches the typical scope of a feasibility study as indicated in the existing project's canonical name. The geographic location and project phase are identical. There is no indication of a different institution or a different phase. Therefore, it is highly likely these documents belong to the same project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 17 consolidated projects
------------------------------------------------------------

📄 Processing D

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Motorway Žilina (Brodno) – Čadca
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project concerns a seismic survey report prepared as part of the feasibility study for the D3 motorway section between Žilina (Brodno) and Čadca. The existing project with the name 'Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto' covers the same motorway corridor and geographic area. The new document appears to be a technical annex or supporting study within the broader feasibility study, likely corresponding to the same overall project but focusing on a specific aspect (seismic risk). The difference in naming is due to the new document referencing the entire section (Žilina (Brodno) – Čadca), while the existing project specifies Stage I (Žilina (Brodno) – Kysucké Nové Mesto), which is a subsection of the full route. Given the overlap in location, scope, and project phase (feasibility study), these documents are pa

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Motorway Žilina (Brodno) - Čadca
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 Motorway section between Žilina (Brodno) and Čadca in the Žilina region. The existing project specifically mentions Stage I: Žilina (Brodno) – Kysucké Nové Mesto, which is a subsection of the broader Žilina (Brodno) – Čadca route. The new project appears to cover the entire section or a broader feasibility study, but both are clearly part of the same overall motorway initiative. The technical focus, geographic scope, and project naming conventions strongly indicate they are phases or components of the same infrastructure project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 17 consolidated projects
------------------------------------------------------------

📄 P

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Procurement of Interoperable Multi-system Electric Locomotives and Trainsets for Long-distance Transport
  🔍 Checking for project matches...
    🔍 Found 1 fuzzy matches:
      - Procurement of interoperable multi-system electric locomotives and trainsets for long-distance transport (similarity: 1.00)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project name is nearly identical to an existing project ('Procurement of interoperable multi-system electric locomotives and trainsets for long-distance transport'). The content matches exactly: both concern the procurement of interoperable multi-system electric locomotives and trainsets for long-distance passenger rail, prepared for Železničná spoločnosť Slovensko, a.s. (ZSSK). There are no indications of different phases, locations, or institutions. The minor difference in capitalization and phrasing does not indicate a separate project. Therefore, these are the same project.
    ✅ Added to existing project: Procurement of interoperable multi-system electric locomotives and trainsets for long-distance transport
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 17 consolidated projects
------------------------------------------------------------

📄 Processing Document 40/103: C

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Transport Model in the D3 Motorway Corridor
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both focus on the D3 motorway corridor in northern Slovakia, specifically the section Žilina (Brodno) – Čadca. The new project is a transport model and feasibility study for this corridor, which aligns with the scope of the existing project, even though the existing project's name specifies 'Stage I: Žilina (Brodno) – Kysucké Nové Mesto.' The new project covers the broader corridor and includes scenario analysis for various motorway variants, which likely encompasses the same section and purpose as the existing feasibility study. Both projects serve as analytical and decision-making bases for the D3 motorway's construction and operation. The overlap in geographic location, project type (feasibility/transport modeling), and purpose strongly indicate they are part of the same overall initiative, possibly as different phases or updates.
    ✅ Added to existing proje

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Financial Analysis of Relocation and Operational Costs for Selected Slovak Health Institutions
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project analyzes the financial and operational implications of relocating and optimizing premises for several Slovak health sector institutions, including the Ministry of Health and related agencies. This aligns closely with the existing project 'Optimization of Spatial Arrangements in the Health Sector,' which likely covers similar analyses of space usage, relocation, and cost optimization for health sector entities. The overlap in sector (health), institutions involved, and the focus on spatial/operational optimization strongly indicate these are documents from the same overarching project, possibly representing different phases or detailed analyses within it.
    📝 Updated canonical name: 'Optimization of Spatial Arrangements in the Health Sector' → 'Financial Analysis of Relocation and Operational Costs for Selected Slovak Health Institutions'
    ✅ Added to existing project: Optimization of

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Capacity Model – Detailed Tables
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: medium
      - Reasoning: The new project, 'Capacity Model – Detailed Tables', focuses on quantitative modeling and projections for healthcare service capacity in the Košice Self-Governing Region (KSK), specifically for outpatient visits and hospital bed needs. Among the existing projects, the closest thematic matches are 'Eastern Slovak Oncology Institute – Construction of a New Surgical and Outpatient Pavilion' and 'Financial Analysis of Relocation and Operational Costs for Selected Slovak Health Institutions.' However, the Oncology Institute project is specific to a single facility and construction, while the financial analysis project is broader and focused on costs rather than capacity modeling or regional infrastructure planning. No existing project explicitly covers regional healthcare capacity modeling or infrastructure planning for KSK. Therefore, there is no clear match.
    🆕 Created new project: Capacity Model – D

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Lease of Electric Locomotives for Long-Distance Transport
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the acquisition of electric locomotives for long-distance rail transport, submitted by Železničná spoločnosť Slovensko (ZSSK). The new project is an updated economic evaluation of leasing 15 (with an option for 15 more) electric locomotives, while the existing project refers to the procurement of interoperable multi-system electric locomotives and trainsets for long-distance transport. Both projects share the same scope (long-distance rail transport, electric locomotives), likely involve the same government actors, and address the same need (replacement of aging locomotives and compliance with international standards). The difference in naming reflects a focus on leasing versus general procurement, but the content and objectives align, indicating these are phases or evaluations of the same overarching project.
    ✅ Added to existing project: Procure

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina (Brodno) - Kysucké Nové Mesto
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 motorway section between Žilina (Brodno) and Kysucké Nové Mesto in northern Slovakia. The new project is a final report of an engineering-geological feasibility study for this section, which aligns with the scope of the existing project named 'Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto.' The geographic location, project phase (feasibility study), and content (geological and engineering assessments for motorway construction) all match. The difference in naming is due to the new project focusing specifically on the Žilina (Brodno) – Kysucké Nové Mesto segment, which is explicitly identified as Stage I in the existing project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brod

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Žilina (Brodno) – Kysucké Nové Mesto, including the separate construction of the KNM feeder road
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project focuses on the D3 motorway section from Žilina (Brodno) to Kysucké Nové Mesto, which exactly matches the geographic scope and section described in the existing project 'Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto.' The new document is a noise study prepared as part of the feasibility study for this section, indicating it is a sub-document or component of the broader feasibility study project. The mention of the KNM feeder road is consistent with detailed studies often included in such feasibility studies. Therefore, these documents belong to the same overarching project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 18 conso

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Floor Plans of the Building at Limbová 2
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: Both the new project and the existing project reference architectural floor plans for the building at Limbová 2. The new project covers general floor plans as part of an annex, while the existing project specifically mentions the canteen and basement. The overlap in location, document type (floor plans), and likely institutional context strongly indicate these are part of the same broader project concerning the building at Limbová 2, possibly as different annexes or sections.
    ✅ Added to existing project: Floor Plan of the Canteen and Basement of the Building at Limbová 2
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 18 consolidated projects
------------------------------------------------------------

📄 Processing Document 47/103: Hodnotenie_GSM-R_Varin-KE-CnT_20230301.pdf
Content length: 3137 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Implementation of GSM-R on the Varín – Košice – Čierna nad Tisou Railway Line (update)
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: None of the existing projects reference the implementation of GSM-R mobile communication technology or specifically mention the Varín – Košice – Čierna nad Tisou railway line. The existing projects focus on different infrastructure (motorways, hospitals, dormitories, oncology institute, administrative premises, procurement of locomotives, etc.) and do not overlap in geographic scope, technology, or subject matter with the new project. No project relates to railway communication systems or modernization of this specific railway corridor.
    🆕 Created new project: Implementation of GSM-R on the Varín – Košice – Čierna nad Tisou Railway Line (update)
    📊 Decision: No match found - creating new project
    📈 Current Status: 19 consolidated projects
------------------------------------------------------------

📄 Processing Document 48/103: hodnotenie_prenajom-priestorov-msvvam.pdf
Content length: 27234 c

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Rental of Premises for the Ministry of Education, Research, Development and Youth of the Slovak Republic
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: medium
      - Reasoning: The new project concerns the economic evaluation and feasibility of relocating the Ministry of Education into rented premises (Spectrum Tower), consolidating multiple workplaces. Among the existing projects, 'Annex No. 3 - Detailed Information on Analyzed Administrative Premises' is the only one that plausibly relates to administrative premises, and its generic name suggests it could be a supporting or detailed document within the same broader project of evaluating and selecting administrative spaces for government use. Both projects likely involve analysis of office space options for government institutions in Bratislava. However, due to the lack of a detailed summary for 'Annex No. 3', the match cannot be confirmed with high confidence, but the overlap in subject matter and terminology makes a medium-confidence match appropriate.
    📝 Updated canonical name: 'Annex No. 3 - Detailed Information on A

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Reconstruction of the Hviezda Dormitory – Update before Contract Signing
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the reconstruction of the Hviezda Dormitory (also known as Kukurica). The new project is an updated economic evaluation before contract signing, specifically focusing on the second phase (Stage II) of the reconstruction, which matches the phase indicated in the existing project name. Both projects involve converting the building for use by the Ministry of Defence, are located at the same site, and are co-financed by the Recovery and Resilience Plan and the state budget. The new document is an update or sub-document within the same overarching project as 'Reconstruction of the Hviezda Dormitory – Stage II.'
    ✅ Added to existing project: Reconstruction of the Hviezda Dormitory – Stage II
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 19 consolidated projects
------------------------------------------------------------

📄 Processi

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina (Brodno) - Čadca
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 motorway between Žilina (Brodno) and Čadca in the Žilina region. The new project is a feasibility study with technical details for this section, matching the scope and content of the existing project. The existing project specifies 'Stage I: Žilina (Brodno) – Kysucké Nové Mesto', which is a subsection of the full Žilina (Brodno) – Čadca route, suggesting both documents are part of the same overall motorway initiative, possibly covering different phases or updates. Both involve the National Motorway Company and reference similar technical documentation. The overlap in geographic location, project scope, and institutional involvement strongly indicate these are documents from the same project, possibly at different stages.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I:

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina (Brodno) - Čadca: Longitudinal Profiles Taken from Archival Sources
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both focus on the D3 motorway section between Žilina (Brodno) and Čadca. The new project is a feasibility study compiling technical and geotechnical data for this section, which aligns with the scope of the existing project. The existing project's name specifies 'Stage I: Žilina (Brodno) – Kysucké Nové Mesto', which is a subsection of the broader Žilina (Brodno) – Čadca route. Given the overlap in geographic location, project type (feasibility study), and technical focus, it is highly likely these documents belong to the same overarching project, possibly as different phases or technical components.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 19 consolidated projects
--------------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Purpose Map of Engineering-Geological Zoning
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project, 'Purpose Map of Engineering-Geological Zoning,' is a technical document focused on geological mapping and zoning for land use and construction planning. None of the existing projects have names or implied scopes related to geological mapping, zoning, or engineering-geological surveys. The existing projects are primarily infrastructure construction (motorways, hospitals, dormitories), financial analyses, procurement, or traffic studies, with no indication of geological mapping as a component. There is no evidence of overlap in project content, scope, or likely responsible institutions.
    🆕 Created new project: Purpose Map of Engineering-Geological Zoning
    📊 Decision: No match found - creating new project
    📈 Current Status: 20 consolidated projects
------------------------------------------------------------

📄 Processing Document 53/103: 665d6b9889bf2401837114.pdf
Content length

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Hospital of the Future Martin – St. Martin University Hospital
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: None of the existing projects reference the construction or modernization of a university hospital in Martin, Slovakia, nor do they mention 'Hospital of the Future', 'St. Martin University Hospital', or any related healthcare facility in Martin. The only existing healthcare-related projects are for the Eastern Slovak Oncology Institute and the F. D. Roosevelt Faculty Hospital in Banská Bystrica, both of which are in different locations and have different scopes. No project in the list matches the geographic location, institution, or specific content of the new project.
    🆕 Created new project: Hospital of the Future Martin – St. Martin University Hospital
    📊 Decision: No match found - creating new project
    📈 Current Status: 21 consolidated projects
------------------------------------------------------------

📄 Processing Document 54/103: Hodnotenie_NDS_R2-Krivan-Tornala_20220727.pdf
Content le

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Expressway R2 Kriváň – Tornaľa
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns the Expressway R2 section Kriváň – Tornaľa, focusing on an economic evaluation and feasibility assessment for this specific road infrastructure. None of the existing projects reference the R2 expressway, the Kriváň – Tornaľa section, or any related feasibility or value-for-money studies for this corridor. The existing projects related to road infrastructure focus on different highways (D1, D3, D4/R7, R1) or unrelated geographic locations. There is no overlap in project name, content, scope, or geographic area. Therefore, the new project does not match any existing project.
    🆕 Created new project: Expressway R2 Kriváň – Tornaľa
    📊 Decision: No match found - creating new project
    📈 Current Status: 22 consolidated projects
------------------------------------------------------------

📄 Processing Document 55/103: Štúdia uskutočniteľnosti zjednote nia priestorov MZSR a

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Optimization of Spatial Arrangements for the Ministry of Health of the Slovak Republic, General Health Insurance Company, National Health Information Center, and Emergency Medical Service Operations Center in Bratislava
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both focus on the optimization, relocation, and operational costs of office spaces for Slovak health institutions, specifically mentioning the Ministry of Health, General Health Insurance Company, National Health Information Center, and Emergency Medical Service Operations Center in Bratislava. The new project is a feasibility study analyzing spatial arrangements and proposing consolidation/relocation options, while the existing project is a financial analysis of relocation and operational costs for selected Slovak health institutions. Both projects involve the same institutions, similar scope (relocation/optimization of office spaces), and the same geographic location (Bratislava). The difference in names reflects different aspects or phases (feasibility vs. financial analysis), but they are clearly part of the same overarching initiative.
    📝 Updated canonica

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina (Brodno) - Čadca
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: medium
      - Reasoning: The new project and the existing project both concern the D3 motorway section between Žilina (Brodno) and Čadca in the Žilina region. The existing project specifically refers to 'Stage I: Žilina (Brodno) – Kysucké Nové Mesto', which is a subsection of the broader Žilina (Brodno) – Čadca route. The new project appears to cover the entire section from Žilina (Brodno) to Čadca, potentially including multiple stages. Both are feasibility studies supporting planning and decision-making for the same motorway corridor. There is a strong overlap in geographic location, scope, and likely institutional involvement. However, since the existing project is explicitly labeled as 'Stage I', there is a possibility that the new project is broader or includes additional phases. Therefore, the confidence is medium.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Update of the Feasibility Study: Completion of the D1 Section Lietavská Lúčka – Dubná Skala (Technology Part)
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project concerns an update to the feasibility study for the D1 motorway section Lietavská Lúčka – Dubná Skala, focusing on the technology part and tunnel systems. None of the existing projects specifically reference this section of the D1 motorway. The closest matches in the list are projects related to other D1 sections (e.g., Bratislava-Triblavina, Bratislava-Senec, Bidovce–State Border SK/UA), but these are geographically and contextually distinct from Lietavská Lúčka – Dubná Skala. No existing project summary or name suggests it covers the same scope, location, or phase as the new project.
    🆕 Created new project: Update of the Feasibility Study: Completion of the D1 Section Lietavská Lúčka – Dubná Skala (Technology Part)
    📊 Decision: No match found - creating new project
    📈 Current Status: 23 consolidated projects
------------------------------------------------------------

📄 Proc

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina (Brodno) - Čadca
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 motorway section between Žilina (Brodno) and Čadca in the Žilina region. The existing project specifically mentions Stage I: Žilina (Brodno) – Kysucké Nové Mesto, which is a subsection of the full Žilina (Brodno) – Čadca route. The new project appears to be a broader feasibility study for the entire section, possibly encompassing multiple stages. The overlap in geographic location, infrastructure type, and project scope (feasibility study for motorway construction) strongly indicate these are phases or components of the same overall project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 23 consolidated projects
-----------------------------------------------------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Motorway D3 Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project name and the existing project name are nearly identical, both referring to the D3 motorway between Žilina (Brodno) and Kysucké Nové Mesto (Stage I). The summary of the new project describes a feasibility study for this exact section, matching the scope, geographic location, and project phase of the existing project. There is no indication of a different phase, update, or unrelated initiative. Therefore, these documents clearly belong to the same project.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 23 consolidated projects
------------------------------------------------------------

📄 Processing Document 60/103: Soci lno ekonomick‚ uŸinky projektu PPP D4_R7.pdf
Content length: 280269 characters


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Socio-economic Impacts of the PPP D4/R7 Project
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both focus on the D4/R7 PPP project, specifically the D4 motorway (Jarovce – Rača) and the R7 expressway (Bratislava Prievoz – Holice) in Slovakia. Both involve economic evaluation methodologies (such as cost-benefit analysis and life cycle cost analysis) and assess the socio-economic impacts and value for money of the same infrastructure initiative. The new project appears to be a detailed or updated socio-economic assessment, but it clearly pertains to the same underlying government project as the existing 'Value for Money Analysis of the D4/R7 PPP Project'.
    ✅ Added to existing project: Value for Money Analysis of the D4/R7 PPP Project
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 23 consolidated projects
------------------------------------------------------------

📄 Processing Document 61/103: 2.1_Ortofotomapa_km_0,000-12,500_I._usek.

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Orthophotomap km 0.000-12.500, Section I
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both involve the creation of orthophotomaps for specific kilometer ranges and sections of a transport corridor. The new project covers km 0.000-12.500, Section I, while the existing project covers km 11,500-16,772, Sections II, III, IV. The overlap in kilometer markers (11,500-12,500) and the sequential sectioning strongly suggest these are parts of the same larger transport infrastructure project, likely divided into phases or sections for mapping and documentation purposes. The content, scope, and naming conventions are highly consistent, indicating they are documents from the same overarching project.
    ✅ Added to existing project: Orthophotomap km 11,500-16,772, Sections II, III, IV
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 23 consolidated projects
------------------------------------------------------------

📄 Processing Document 6

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D1 Motorway Bratislava-Triblavina Expansion
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project ('D1 Motorway Bratislava-Triblavina Expansion') and the existing project ('Extension of the D1 Motorway Bratislava - Triblavina') refer to the same motorway section (Bratislava-Triblavina) and both involve expansion/extension works. The new project summary describes widening, new interchanges, and coordination with D4, which aligns with the typical scope of an 'extension' project. The geographic location, infrastructure type, and project scope all match. The slight difference in naming ('Expansion' vs. 'Extension') is a common translation or terminology variation, but the core project is the same.
    ✅ Added to existing project: Extension of the D1 Motorway Bratislava - Triblavina
    📊 Decision: LLM match (confidence: high)
    📈 Current Status: 23 consolidated projects
------------------------------------------------------------

📄 Processing Document 63/103: C_1_5_4_UČELOVA_HG_MAPA.

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Purpose-Built Hydrogeological Map
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: medium
      - Reasoning: The new project, 'Purpose-Built Hydrogeological Map', and the existing project, 'Purpose Map of Engineering-Geological Zoning', both involve the creation of specialized maps related to the geological and hydrogeological characteristics of the Slovak Republic. While the names are not identical, both projects focus on mapping and zoning based on subsurface characteristics (engineering-geological vs. hydrogeological). The scope overlaps significantly, as both would require similar data sources, methodologies, and likely involve the same government institutions (e.g., geological survey authorities). However, there is some uncertainty because the existing project’s summary is missing, so the match is not certain. The confidence is medium due to the strong thematic and functional overlap, but not high due to lack of detailed summary for the existing project.
    ✅ Added to existing project: Purpose Map of E

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: CBA D3 Feeder - Annex
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project ('CBA D3 Feeder - Annex') is an annex to a cost-benefit analysis (CBA) for a 'D3 feeder road project.' The only existing project referencing 'D3' is the 'Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysucké Nové Mesto.' The content of the new project (economic assumptions, CBA methodology) is consistent with supporting documentation for a feasibility study of a major road infrastructure project. The naming ('D3 Feeder') suggests a direct connection to the D3 motorway project, likely as a supporting or related infrastructure. Therefore, it is highly likely that the new document is part of the same overall D3 motorway project, possibly as a technical annex or supporting analysis.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Motorway Žilina (Brodno) – Čadca, Stage I: Žilina (Brodno) – Kysuc

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Standard Table of Vehicles - Long-distance Transport
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project, 'Standard Table of Vehicles - Long-distance Transport,' is a methodological or reference document providing macroeconomic parameters and financial assumptions for use in cost-benefit analyses of transport projects. It is not a specific infrastructure project, feasibility study, or procurement, but rather a supporting document for standardized evaluation across multiple transport projects. None of the existing projects are methodological standards or reference tables; they are all specific infrastructure projects, studies, or procurements. There is no overlap in project name, content, or scope.
    🆕 Created new project: Standard Table of Vehicles - Long-distance Transport
    📊 Decision: No match found - creating new project
    📈 Current Status: 24 consolidated projects
------------------------------------------------------------

📄 Processing Document 66/103: Pr¡loha Ÿ. 7 Aktualiz c

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: Technological Equipment of the Višňové Tunnel Including the Information System of the D1 Motorway Lietavská Lúčka – Višňové – Dubná Skala (according to FIDIC contract conditions – 'Yellow Book')
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project focuses on the technological equipment and information system for the Višňové Tunnel and the D1 motorway section Lietavská Lúčka – Višňové – Dubná Skala. The existing project 'Update of the Feasibility Study: Completion of the D1 Section Lietavská Lúčka – Dubná Skala (Technology Part)' covers the same geographic section and specifically mentions the technology part, which aligns with the new project's focus on technological equipment and systems. Both are feasibility study updates prior to contract signing, and the summaries indicate similar content and timing. The Višňové Tunnel is a key feature of this D1 section, further confirming the match.
    📝 Updated canonical name: 'Update of the Feasibility Study: Completion of the D1 Section Lietavská Lúčka – Dubná Skala (Technology Part)' → 'Technological Equipment of the Višňové Tunnel Including the Information System of the D1 Motorway Lie

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: D3 Motorway Žilina (Brodno) - Čadca
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: True
      - Confidence: high
      - Reasoning: The new project and the existing project both concern the D3 motorway section between Žilina (Brodno) and Čadca in the Žilina region. The new project describes an overview and technical solution for the entire section from Žilina (Brodno) to Čadca, referencing previous technical documentation and studies, which aligns with the existing project's focus on a feasibility study for the same corridor, specifically Stage I (Žilina (Brodno) – Kysucké Nové Mesto). The overlap in geographic location, project scope, and references to project stages strongly indicate these documents are part of the same overall motorway initiative, possibly representing different phases or updates. The naming difference is due to the new project being broader or more recent, but both are clearly linked to the D3 Žilina (Brodno) – Čadca corridor.
    ✅ Added to existing project: Feasibility Study for the Construction of the D3 Moto

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  📋 Extracted Project: CBA Standard Table - Roads
  🔍 Checking for project matches...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


    🤖 LLM Analysis:
      - Match: False
      - Confidence: high
      - Reasoning: The new project, 'CBA Standard Table - Roads', is a methodological or reference document providing standardized parameters for cost-benefit analysis of road infrastructure projects in Slovakia. It is not a specific infrastructure project, feasibility study, or value-for-money assessment of a particular road, motorway, or transport corridor. None of the existing projects are methodological standards or general reference tables; they are all specific to particular infrastructure projects, locations, or institutions. The only somewhat similar title is 'Standard Table of Vehicles - Long-distance Transport', but that appears to be a standard table for vehicles, not for roads or CBA methodology. Therefore, there is no match.
    🆕 Created new project: CBA Standard Table - Roads
    📊 Decision: No match found - creating new project
    📈 Current Status: 25 consolidated projects
-------------------------------

KeyboardInterrupt: 

In [3]:
# Save consolidated results
print("💾 Saving consolidated project results...")

try:
    # Create a flattened list for Excel export
    consolidated_metadata = []
    
    for project_name, project_data in canonical_projects.items():
        for doc in project_data['documents']:
            consolidated_metadata.append({
                'canonical_project_name': project_name,
                'canonical_project_name_slovak': project_data.get('project_name_slovak', ''),
                'canonical_sector': project_data.get('sector', ''),
                'filename': doc['filename'],
                'original_project_name': doc['project_name'],
                'original_project_name_slovak': doc.get('project_name_slovak', ''),
                'document_type': doc.get('document_type', ''),
                'sector': doc.get('sector', ''),
                'summary': doc.get('summary', ''),
                'content_length': doc.get('content_length', 0),
                'documents_in_project': len(project_data['documents'])
            })
    
    # Save consolidated results
    save_to_excel(consolidated_metadata, "consolidated_projects.xlsx", "Consolidated Projects")
    
    # Save original metadata for comparison
    save_to_excel(all_metadata, "original_metadata.xlsx", "Original Metadata")
    
    # Save to JSON as backup
    with open("consolidated_projects.json", 'w', encoding='utf-8') as f:
        json.dump(canonical_projects, f, ensure_ascii=False, indent=2)
    print("✅ Consolidated projects backup saved to consolidated_projects.json")
    
except Exception as e:
    print(f"❌ Error saving consolidated files: {str(e)}")

print("\n🎉 Consolidated processing complete!")
print("📁 Files created:")
print("- consolidated_projects.xlsx (main output with project groupings)")
print("- original_metadata.xlsx (original document metadata)")
print("- consolidated_projects.json (backup)")
print("\n📊 Key Benefits:")
print("- Documents grouped by actual project relationships")
print("- Canonical project names automatically selected")
print("- Real-time consolidation with LLM + fuzzy matching")
print("- Clear visibility into which documents belong together")


💾 Saving consolidated project results...
✅ Data saved to consolidated_projects.xlsx
✅ Data saved to original_metadata.xlsx
✅ Consolidated projects backup saved to consolidated_projects.json

🎉 Consolidated processing complete!
📁 Files created:
- consolidated_projects.xlsx (main output with project groupings)
- original_metadata.xlsx (original document metadata)
- consolidated_projects.json (backup)

📊 Key Benefits:
- Documents grouped by actual project relationships
- Canonical project names automatically selected
- Real-time consolidation with LLM + fuzzy matching
- Clear visibility into which documents belong together


In [4]:
# Flatten canonical_projects into a document-level DataFrame
consolidated_data = []

for project_name, project_data in canonical_projects.items():
    for doc in project_data['documents']:
        consolidated_data.append({
            'canonical_project_name': project_name,
            'canonical_project_name_slovak': project_data.get('project_name_slovak', ''),
            'filename': doc['filename'],
            'original_project_name': doc['project_name'],
            'original_project_name_slovak': doc.get('project_name_slovak', ''),
            'document_type': doc.get('document_type', ''),
            'sector': doc.get('sector', ''),
            'summary': doc.get('summary', ''),
            'content_length': doc.get('content_length', 0),
            'documents_in_project': len(project_data['documents'])
        })

# Create DataFrame
df_consolidated = pd.DataFrame(consolidated_data)
print(f"DataFrame shape: {df_consolidated.shape}")
df_consolidated.head()

DataFrame shape: (68, 10)


Unnamed: 0,canonical_project_name,canonical_project_name_slovak,filename,original_project_name,original_project_name_slovak,document_type,sector,summary,content_length,documents_in_project
0,Reconstruction of the Hviezda Dormitory – Stag...,Rekonštrukcia internátu Hviezda – Etapa II.,164039.pdf,Reconstruction of the Hviezda Dormitory – Stag...,Rekonštrukcia internátu Hviezda – Etapa II.,Feasibility Study Update Form (prior to contra...,Building,This document is an update form for the feasib...,11972,2
1,Reconstruction of the Hviezda Dormitory – Stag...,Rekonštrukcia internátu Hviezda – Etapa II.,hodnotenie_hviezda_aktualizacia_pred_podpisom.pdf,Reconstruction of the Hviezda Dormitory – Upda...,Aktualizácia pred podpisom zmluvy: Rekonštrukc...,Update of economic evaluation (cost-benefit an...,Building,This document is an updated economic evaluatio...,4669,2
2,Value for Money Assessment of the D1 Corridor ...,Hodnota za peniaze projektu Koridor D1 Bidovce...,hodnotenie_d1-bidovce-statna-hranica-sk_ua.pdf,Value for Money Assessment of the D1 Corridor ...,Hodnota za peniaze projektu Koridor D1 Bidovce...,Government evaluation of a feasibility study,Transport,This document is an evaluation by the Slovak M...,29963,1
3,Extension of the D1 Motorway Bratislava - Trib...,Rozšírenie diaľnice D1 Bratislava -Triblavina,Inform cia o zmluvnej cene Ba-Tri.pdf,Extension of the D1 Motorway Bratislava - Trib...,Rozšírenie diaľnice D1 Bratislava -Triblavina,Contract price information,Transport,This document provides information about the c...,685,2
4,Extension of the D1 Motorway Bratislava - Trib...,Rozšírenie diaľnice D1 Bratislava -Triblavina,Zhrnutie D1 Bratislava-Triblavina.docx,D1 Motorway Bratislava-Triblavina Expansion,Diaľnica D1 Bratislava-Triblavina,Project summary and technical overview,Transport,This document provides a summary of the planne...,21734,2
