# ESG Ontology Analysis

This notebook analyzes the ESG ontology to understand:
1. What ESG indicators are defined
2. How they are categorized (Environmental, Social, Governance)
3. Their relationships and properties
4. How to use them for annotation guidelines

In [None]:
# Install required packages if not already installed
# !pip install rdflib owlready2 pandas matplotlib seaborn

In [None]:
import sys
sys.path.append('../src')

from ontology_analyzer import ESGOntologyAnalyzer
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

## 1. Load and Analyze the ESG Ontology

In [None]:
# Initialize the ontology analyzer
ontology_path = "../../esgontology.owl"
analyzer = ESGOntologyAnalyzer(ontology_path)

# Perform complete analysis
analysis_result = analyzer.analyze_ontology()

In [None]:
# Display summary statistics
print("=== ESG Ontology Analysis Summary ===")
for key, value in analysis_result['summary'].items():
    print(f"{key.replace('_', ' ').title()}: {value}")

## 2. Explore ESG Indicators

In [None]:
# Create indicator mapping DataFrame
indicators_df = analyzer.create_indicator_mapping(analysis_result)
print(f"Found {len(indicators_df)} indicators in the ontology")
indicators_df.head(10)

In [None]:
# Analyze indicator types
type_counts = indicators_df['type'].value_counts()
print("Indicator Types:")
print(type_counts)

# Visualize
plt.figure(figsize=(8, 6))
type_counts.plot(kind='bar')
plt.title('Distribution of ESG Indicator Types')
plt.xlabel('Indicator Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 3. Analyze Categories and Domains

In [None]:
# Explore categories
categories_df = pd.DataFrame.from_dict(analysis_result['categories'], orient='index')
print(f"Found {len(categories_df)} categories")
categories_df.head()

In [None]:
# Analyze domain distribution
domain_counts = categories_df['domain'].value_counts()
print("Domain Distribution:")
print(domain_counts)

# Visualize
plt.figure(figsize=(10, 6))
domain_counts.plot(kind='bar')
plt.title('Distribution of ESG Domains')
plt.xlabel('Domain')
plt.ylabel('Number of Categories')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 4. Key ESG Indicators for Extraction

In [None]:
# Focus on key environmental indicators
environmental_keywords = ['energy', 'waste', 'water', 'carbon', 'emission', 'efficiency']
environmental_indicators = indicators_df[
    indicators_df['indicator_name'].str.lower().str.contains('|'.join(environmental_keywords), na=False)
]

print("Key Environmental Indicators:")
for _, indicator in environmental_indicators.iterrows():
    print(f"- {indicator['indicator_name']}: {indicator['label']}")

In [None]:
# Focus on social indicators
social_keywords = ['employee', 'safety', 'diversity', 'training', 'health']
social_indicators = indicators_df[
    indicators_df['indicator_name'].str.lower().str.contains('|'.join(social_keywords), na=False)
]

print("Key Social Indicators:")
for _, indicator in social_indicators.iterrows():
    print(f"- {indicator['indicator_name']}: {indicator['label']}")

In [None]:
# Focus on governance indicators
governance_keywords = ['board', 'governance', 'compliance', 'ethics', 'transparency']
governance_indicators = indicators_df[
    indicators_df['indicator_name'].str.lower().str.contains('|'.join(governance_keywords), na=False)
]

print("Key Governance Indicators:")
for _, indicator in governance_indicators.iterrows():
    print(f"- {indicator['indicator_name']}: {indicator['label']}")

## 5. Create Annotation Guidelines

In [None]:
# Create annotation guidelines based on ontology
annotation_guidelines = {
    'Environmental': {
        'indicators': environmental_indicators['indicator_name'].tolist(),
        'patterns': [
            'Energy consumption: X kWh',
            'CO2 emissions: X tonnes',
            'Water usage: X liters',
            'Waste reduction: X%',
            'Energy efficiency: X%'
        ]
    },
    'Social': {
        'indicators': social_indicators['indicator_name'].tolist(),
        'patterns': [
            'Employee count: X',
            'Safety incidents: X',
            'Training hours: X hours',
            'Diversity ratio: X%'
        ]
    },
    'Governance': {
        'indicators': governance_indicators['indicator_name'].tolist(),
        'patterns': [
            'Board independence: X%',
            'Compliance score: X',
            'Ethics training: X%'
        ]
    }
}

print("Annotation Guidelines Created:")
for domain, details in annotation_guidelines.items():
    print(f"\n{domain}:")
    print(f"  Indicators: {len(details['indicators'])}")
    print(f"  Example patterns: {details['patterns'][:3]}")

## 6. Save Results

In [None]:
# Save analysis results
output_dir = Path('../data/ontology')
output_dir.mkdir(exist_ok=True)

# Save complete analysis
with open(output_dir / 'esg_ontology_analysis.json', 'w') as f:
    json.dump(analysis_result, f, indent=2)

# Save indicator mapping
indicators_df.to_csv(output_dir / 'esg_indicators_mapping.csv', index=False)

# Save annotation guidelines
with open(output_dir / 'annotation_guidelines.json', 'w') as f:
    json.dump(annotation_guidelines, f, indent=2)

print("Results saved to:", output_dir)

## Next Steps

1. **Manual Annotation**: Use the indicators identified here to manually annotate ESG metrics in your corporate reports
2. **Pattern Analysis**: Look for common patterns in how these indicators are presented in documents
3. **Training Data**: Create labeled datasets for training your ESG extraction model
4. **Model Selection**: Choose appropriate base models (FinBERT, BERT) for fine-tuning

The ontology analysis provides the foundation for understanding what ESG indicators to extract and how they relate to each other.