In [None]:
import os
import sys
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import display, HTML

# اضافه کردن مسیر src به PYTHONPATH
current_dir = os.path.abspath('')
if current_dir.endswith('caelus_compliance_project'):
    src_path = os.path.join(current_dir, 'src')
else:
    src_path = os.path.join(current_dir, 'caelus_compliance_project', 'src')
    
sys.path.append(src_path)

# تنظیم استایل نمودارها
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 12


In [None]:
from data_ingestion import DataIngestion

# مسیر داده‌های نمونه
data_dir = os.path.join(current_dir, 'data')
regulations_path = os.path.join(data_dir, 'regulations', 'nuclear_safety_regulation.txt')
design_path = os.path.join(data_dir, 'designs', 'reactor_cooling_system_design.txt')

# نمونه بارگیری داده
data_ingestion = DataIngestion()

# خواندن متون
if os.path.exists(regulations_path) and os.path.exists(design_path):
    with open(regulations_path, 'r', encoding='utf-8') as f:
        regulations_text = f.read()
        
    with open(design_path, 'r', encoding='utf-8') as f:
        design_text = f.read()
        
    print(f"Regulations text loaded: {len(regulations_text)} characters")
    print(f"Design text loaded: {len(design_text)} characters")
    
    # نمایش بخشی از متون
    print("\nExcerpt from regulations:")
    print("-" * 50)
    print(regulations_text[:500] + "...")
    
    print("\nExcerpt from design document:")
    print("-" * 50)
    print(design_text[:500] + "...")
else:
    # اگر فایل‌ها وجود نداشتند، متن نمونه تولید می‌کنیم
    regulations_text = """Nuclear Safety Regulation Article 5.2: All cooling system connections must have standard thermal insulation with a minimum thickness of 50 mm.
Article 7.1: Seismic resistance with a minimum intensity of 0.35g is mandatory for all cooling system components.
Article 8.4: The emergency cooling system must be able to operate for at least 72 hours without an external power source in case of power failure."""
    
    design_text = """Reactor Cooling System Design Specifications:
The cooling system connections are insulated with standard industrial-grade thermal insulation. Average thickness is 45mm.
All cooling system components are designed to withstand seismic events up to 0.25g intensity.
The emergency cooling system can operate without external power for 96 hours during power outage situations."""
    
    print("Using sample text data instead of files")
    print(f"Sample regulations: {len(regulations_text)} characters")
    print(f"Sample design: {len(design_text)} characters")


In [None]:
# تقسیم متن به واحدهای معنایی
reg_units = data_ingestion.text_to_semantic_units(regulations_text)
design_units = data_ingestion.text_to_semantic_units(design_text)

print(f"Extracted {len(reg_units)} semantic units from regulations")
print(f"Extracted {len(design_units)} semantic units from design document")

# نمایش نمونه‌ای از واحدهای معنایی
print("\nSample regulation units:")
for i, unit in enumerate(reg_units[:3]):
    print(f"Unit {i+1}: {unit['text'][:100]}...")
    
print("\nSample design units:")
for i, unit in enumerate(design_units[:3]):
    print(f"Unit {i+1}: {unit['text'][:100]}...")


In [None]:
from knowledge_graph import KnowledgeGraphBuilder

# ایجاد گراف دانش
kg_builder = KnowledgeGraphBuilder()

# استفاده از داده‌های آموزشی رابطه برای گراف دانش
relations_path = os.path.join(data_dir, 'fine_tuning_datasets', 'relations.jsonl')

if os.path.exists(relations_path):
    with open(relations_path, 'r', encoding='utf-8') as f:
        relations_data = [json.loads(line) for line in f]
    
    print(f"Loaded {len(relations_data)} relation examples for knowledge graph")
    
    # ساخت گراف دانش
    knowledge_graph = kg_builder.build_graph_from_relations(relations_data)
    
    # نمایش آمار گراف دانش
    print(f"Knowledge graph created with {len(knowledge_graph['nodes'])} nodes and {len(knowledge_graph['edges'])} edges")
    
    # نمایش نمونه‌ای از روابط گراف
    print("\nSample relations:")
    for i, edge in enumerate(knowledge_graph['edges'][:5]):
        print(f"{edge['source']} --[{edge['relation']}]--> {edge['target']}")
else:
    print("Relations data not found. Skipping knowledge graph creation.")


In [None]:
from llm_finetuning import LLMFineTuner

# مسیرهای مورد نیاز
compliance_examples_path = os.path.join(data_dir, 'fine_tuning_datasets', 'compliance_examples.jsonl')
fine_tuned_model_dir = os.path.join(current_dir, 'models', 'fine_tuned_llm')

if os.path.exists(compliance_examples_path):
    # خواندن داده‌های آموزشی
    with open(compliance_examples_path, 'r', encoding='utf-8') as f:
        examples = [json.loads(line) for line in f]
    
    print(f"Loaded {len(examples)} training examples for fine-tuning")
    
    # نمایش یک مثال از داده‌های آموزشی
    if examples:
        example = examples[0]
        print("\nSample training conversation:")
        for msg in example['messages']:
            print(f"Role: {msg['role']}")
            print(f"Content: {msg['content'][:150]}..." if len(msg['content']) > 150 else f"Content: {msg['content']}")
            print("---")
    
    # توضیح فرآیند فاین‌تیونینگ (بدون اجرای واقعی به دلیل محدودیت‌های منابع)
    print("\nFine-tuning process explanation:")
    print("1. The LLMFineTuner class prepares the training data in the appropriate format")
    print("2. It loads a base model (e.g., Mistral-7B) with quantization for memory efficiency")
    print("3. It applies LoRA (Low-Rank Adaptation) for parameter-efficient fine-tuning")
    print("4. The model is trained on compliance assessment examples")
    print("5. The fine-tuned model is saved for later compliance checking")
    
    # نمایش معماری فاین‌تیونینگ
    print("\nFine-tuning architecture parameters:")
    print("- Base model: mistralai/Mistral-7B-v0.1")
    print("- LoRA rank: 8")
    print("- LoRA alpha: 16")
    print("- Learning rate: 2e-5")
    print("- Training epochs: 3")
else:
    print("Training examples not found. Skipping fine-tuning demonstration.")


In [None]:
from compliance_checker import ComplianceChecker

# راه‌اندازی بررسی‌کننده انطباق
checker = ComplianceChecker(
    fine_tuned_model_path=fine_tuned_model_dir if os.path.exists(fine_tuned_model_dir) else None,
    base_model_name='mistralai/Mistral-7B-Instruct-v0.2'
)

# مثال‌های بررسی انطباق
compliance_examples = [
    {
        "design": "The thermal insulation for cooling system connections is specified as standard industrial grade with average thickness of 45mm.",
        "regulation": "All cooling system connections must have standard thermal insulation with a minimum thickness of 50 mm."
    },
    {
        "design": "The seismic resistance of all cooling system components is designed to withstand earthquakes of intensity up to 0.25g.",
        "regulation": "Seismic resistance with a minimum intensity of 0.35g is mandatory for all cooling system components."
    },
    {
        "design": "The emergency cooling system is designed to operate for 96 hours without an external power source in case of power failure.",
        "regulation": "The emergency cooling system must be able to operate for at least 72 hours without an external power source in case of power failure."
    }
]

# بررسی انطباق برای هر مثال
compliance_results = []

print("Performing compliance checks...")
for i, example in enumerate(compliance_examples):
    print(f"\nChecking example {i+1}:")
    print(f"Design: {example['design']}")
    print(f"Regulation: {example['regulation']}")
    
    result = checker.check_compliance(
        design_text=example['design'],
        regulation_text=example['regulation']
    )
    
    result['regulation_text'] = example['regulation']
    result['design_text'] = example['design']
    result['similarity'] = 0.8 + (i * 0.05)  # مقدار شباهت ساختگی برای نمایش
    
    compliance_results.append(result)
    
    print(f"Status: {result.get('compliance_status', 'Unknown')}")
    print(f"Justification: {result.get('justification', '')[:200]}..." if len(result.get('justification', '')) > 200 else f"Justification: {result.get('justification', '')}")


In [None]:
# تولید گزارش انطباق
report = checker.generate_compliance_report(compliance_results)

# نمایش خلاصه گزارش
print("Compliance Report Summary")
print("-" * 30)
print(f"Total Requirements: {report['summary']['total_requirements']}")
print(f"Compliance Percentage: {report['summary']['compliance_percentage']}%")
print(f"Critical Issues: {report['summary']['critical_issues_count']}")
print("\nStatus Counts:")
for status, count in report['summary']['status_counts'].items():
    if count > 0:
        print(f"  - {status}: {count}")

# نمایش نمودار وضعیت انطباق
status_counts = report['summary']['status_counts']
labels = [k for k, v in status_counts.items() if v > 0]
values = [v for k, v in status_counts.items() if v > 0]
colors = ['#66c2a5', '#fc8d62', '#8da0cb', '#e78ac3', '#a6d854']

plt.figure(figsize=(10, 6))
plt.pie(values, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors)
plt.axis('equal')
plt.title('Compliance Status Distribution')
plt.show()

# نمایش مسائل بحرانی
if report['critical_issues']:
    print("\nCritical Issues:")
    print("-" * 30)
    for i, issue in enumerate(report['critical_issues']):
        print(f"{i+1}. Risk Score: {issue.get('risk_score', 'N/A')}")
        print(f"   Regulation: {issue.get('regulation_text', 'N/A')[:150]}..." if len(issue.get('regulation_text', '')) > 150 else f"   Regulation: {issue.get('regulation_text', 'N/A')}")
        print(f"   Design: {issue.get('design_text', 'N/A')[:150]}..." if len(issue.get('design_text', '')) > 150 else f"   Design: {issue.get('design_text', 'N/A')}")
        print(f"   Recommendation: {issue.get('recommendation', 'N/A')[:150]}..." if len(issue.get('recommendation', '')) > 150 else f"   Recommendation: {issue.get('recommendation', 'N/A')}")
        print()


In [None]:
# آنالیز دسته‌بندی مقررات
if 'statistics' in report and 'regulatory_categories' in report['statistics']:
    categories = report['statistics']['regulatory_categories']
    cat_labels = [k for k, v in categories.items() if v > 0]
    cat_values = [v for k, v in categories.items() if v > 0]
    
    plt.figure(figsize=(12, 6))
    bars = plt.bar(cat_labels, cat_values, color='#66c2a5')
    plt.title('Regulations by Category')
    plt.xlabel('Category')
    plt.ylabel('Count')
    plt.xticks(rotation=45, ha='right')
    
    # افزودن برچسب مقادیر روی نمودار
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
                 str(int(height)), ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()

# آنالیز میزان انطباق بر اساس میزان اهمیت
if 'statistics' in report and 'compliance_by_similarity' in report['statistics']:
    relevance_data = report['statistics']['compliance_by_similarity']
    relevance_labels = list(relevance_data.keys())
    relevance_values = list(relevance_data.values())
    
    plt.figure(figsize=(10, 6))
    plt.bar(relevance_labels, relevance_values, color=['#8dd3c7', '#bebada', '#fb8072'])
    plt.title('Compliance Rate by Relevance')
    plt.xlabel('Relevance Category')
    plt.ylabel('Compliance Rate (%)')
    plt.ylim(0, 100)
    
    for i, v in enumerate(relevance_values):
        plt.text(i, v + 2, f"{v}%", ha='center')
    
    plt.tight_layout()
    plt.show()

# آنالیز تلاش پیاده‌سازی
if 'statistics' in report and 'implementation_effort' in report['statistics']:
    effort_data = report['statistics']['implementation_effort']
    effort_labels = ['Easy Fixes', 'Medium Fixes', 'Complex Fixes']
    effort_values = [effort_data.get('easy_fixes', 0), 
                     effort_data.get('medium_fixes', 0), 
                     effort_data.get('complex_fixes', 0)]
    
    plt.figure(figsize=(10, 6))
    plt.pie(effort_values, labels=effort_labels, autopct='%1.1f%%', startangle=90,
           colors=['#a6d854', '#ffff99', '#fc8d62'])
    plt.axis('equal')
    plt.title('Implementation Effort Distribution')
    plt.tight_layout()
    plt.show()


In [None]:
# Import necessary libraries
import os
import sys
import json
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# Add the src directory to the path for imports
sys.path.append(os.path.join(os.getcwd(), 'src'))

# Import project modules
try:
    from src.data_ingestion import DataIngestion
    from src.knowledge_graph import KnowledgeGraph
    from src.compliance_checker import ComplianceChecker
    from src.report_generator import ReportGenerator
except ImportError:
    print("Note: Some modules might be missing. This notebook is for demonstration purposes.")

# Set up output directory
os.makedirs('output', exist_ok=True)
