# Intent-Based Network Generation Augmentation Toolkit

This notebook demonstrates the usage of the Intent-Based Network Generation Augmentation toolkit for creating sophisticated 3GPP network intent datasets.

## Setup and Imports

In [None]:
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Add src to path
sys.path.insert(0, os.path.join('..', 'src'))

from Intents_Generators.Advanced3GPPIntentGenerator import Advanced3GPPIntentGenerator
from Intents_Generators.Constants_Enums import IntentType, Priority
from Evaluation.evaluation_metric import DataEvaluator
from augmentation_utils import paraphrase, back_translate, synonym_augment

print("✓ Imports successful")

## 1. Basic Intent Generation

In [None]:
# Create generator
generator = Advanced3GPPIntentGenerator(use_llm_synthesis=False)

# Generate sample intents
print("Generating 10 sample intents...")
intents = generator.generate_batch(10)

print(f"Generated {len(intents)} intents")
print("\nSample intents:")
for i, intent in enumerate(intents[:3], 1):
    print(f"\n{i}. Type: {intent.intent_type}")
    print(f"   Description: {intent.description[:100]}...")
    print(f"   Priority: {intent.priority}")
    print(f"   Location: {intent.location}")
    print(f"   Network Slice: {intent.network_slice}")

## 2. Dataset Analysis

In [None]:
# Analyze intent types
intent_types = [intent.intent_type for intent in intents]
priorities = [intent.priority for intent in intents]
complexities = [intent.metadata.get('technical_complexity', 5) for intent in intents]

# Create visualizations
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Intent types distribution
pd.Series(intent_types).value_counts().plot(kind='bar', ax=axes[0])
axes[0].set_title('Intent Types Distribution')
axes[0].set_xlabel('Intent Type')
axes[0].set_ylabel('Count')
axes[0].tick_params(axis='x', rotation=45)

# Priority distribution
pd.Series(priorities).value_counts().plot(kind='bar', ax=axes[1])
axes[1].set_title('Priority Distribution')
axes[1].set_xlabel('Priority')
axes[1].set_ylabel('Count')

# Complexity distribution
axes[2].hist(complexities, bins=5, alpha=0.7)
axes[2].set_title('Technical Complexity Distribution')
axes[2].set_xlabel('Complexity Level')
axes[2].set_ylabel('Count')

plt.tight_layout()
plt.show()

print(f"\nDataset Statistics:")
print(f"Total intents: {len(intents)}")
print(f"Unique intent types: {len(set(intent_types))}")
print(f"Average complexity: {sum(complexities)/len(complexities):.2f}")

## 3. Text Augmentation

In [None]:
# Select a sample intent for augmentation
sample_intent = intents[0]
original_text = sample_intent.description

print(f"Original text:")
print(f"{original_text}")
print("\n" + "="*80 + "\n")

# Apply different augmentation techniques
augmentation_techniques = [
    ("Synonym Replacement", synonym_augment),
    # Note: Other techniques may require models to be loaded
]

for name, func in augmentation_techniques:
    try:
        augmented = func(original_text)
        print(f"{name}:")
        print(f"{augmented}")
        print()
    except Exception as e:
        print(f"{name}: Error - {e}")
        print()

## 4. Export Data

In [None]:
# Export to different formats
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# CSV export
csv_file = f"notebook_intents_{timestamp}.csv"
generator.export_to_csv(intents, csv_file)
print(f"✓ Exported to CSV: {csv_file}")

# JSON export
json_file = f"notebook_intents_{timestamp}.json"
generator.export_to_json(intents, json_file)
print(f"✓ Exported to JSON: {json_file}")

# Load and display CSV data
df = pd.read_csv(csv_file)
print(f"\nCSV Data Preview:")
print(df.head())
print(f"\nDataFrame shape: {df.shape}")
print(f"Columns: {list(df.columns)}")

## 5. Quality Evaluation

In [None]:
# Prepare sample descriptions for evaluation
sample_descriptions = [intent.description for intent in intents[:3]]

print("Sample descriptions for evaluation:")
for i, desc in enumerate(sample_descriptions, 1):
    print(f"{i}. {desc[:100]}...")

# Try evaluation (may not work without LLM setup)
try:
    evaluator = DataEvaluator()
    result = evaluator.evaluate_batch(sample_descriptions)
    
    print(f"\nEvaluation Results:")
    metrics = result['overall_metrics']
    print(f"Overall Quality: {metrics.overall_quality:.2f}/10")
    print(f"Technical Accuracy: {metrics.technical_accuracy:.2f}/10")
    print(f"3GPP Compliance: {metrics.compliance_level:.2f}/10")
    print(f"Research Value: {metrics.research_value:.2f}/10")
    
    print(f"\nKey Insights:")
    for insight in result['batch_insights']:
        print(f"- {insight}")
        
except Exception as e:
    print(f"\nEvaluation failed (expected without LLM setup): {e}")
    print("To enable evaluation, install and configure Ollama with Mistral model.")

## 6. Advanced Analysis

In [None]:
# Analyze text characteristics
descriptions = [intent.description for intent in intents]
text_lengths = [len(desc) for desc in descriptions]
word_counts = [len(desc.split()) for desc in descriptions]

# Create analysis plots
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Text length distribution
axes[0, 0].hist(text_lengths, bins=10, alpha=0.7, color='skyblue')
axes[0, 0].set_title('Description Length Distribution')
axes[0, 0].set_xlabel('Characters')
axes[0, 0].set_ylabel('Frequency')

# Word count distribution
axes[0, 1].hist(word_counts, bins=10, alpha=0.7, color='lightgreen')
axes[0, 1].set_title('Word Count Distribution')
axes[0, 1].set_xlabel('Words')
axes[0, 1].set_ylabel('Frequency')

# Complexity vs Priority
priority_mapping = {'LOW': 1, 'MEDIUM': 2, 'HIGH': 3, 'CRITICAL': 4, 'EMERGENCY': 5}
priority_numeric = [priority_mapping.get(p, 2) for p in priorities]

axes[1, 0].scatter(complexities, priority_numeric, alpha=0.7, color='coral')
axes[1, 0].set_title('Complexity vs Priority')
axes[1, 0].set_xlabel('Technical Complexity')
axes[1, 0].set_ylabel('Priority Level')

# Network slice distribution
network_slices = [intent.network_slice for intent in intents if intent.network_slice]
slice_counts = pd.Series(network_slices).value_counts().head(5)
slice_counts.plot(kind='bar', ax=axes[1, 1], color='gold')
axes[1, 1].set_title('Top 5 Network Slices')
axes[1, 1].set_xlabel('Network Slice')
axes[1, 1].set_ylabel('Count')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

print(f"\nText Analysis:")
print(f"Average description length: {sum(text_lengths)/len(text_lengths):.1f} characters")
print(f"Average word count: {sum(word_counts)/len(word_counts):.1f} words")
print(f"Most common network slice: {slice_counts.index[0] if len(slice_counts) > 0 else 'N/A'}")

## 7. Cleanup

In [None]:
# List generated files
import glob

generated_files = glob.glob(f"notebook_intents_{timestamp}.*")
print(f"Generated files:")
for file in generated_files:
    if os.path.exists(file):
        size = os.path.getsize(file)
        print(f"- {file} ({size} bytes)")

# Uncomment to clean up files
# for file in generated_files:
#     if os.path.exists(file):
#         os.remove(file)
#         print(f"Removed {file}")

print("\n✓ Notebook execution completed successfully!")