# ESG Ontology Analysis

This notebook analyzes the ESG ontology to understand:
1. What ESG indicators are defined in the ontology
2. How they are categorized (Environmental, Social, Governance)
3. Their relationships and properties
4. How to use them for creating comprehensive ESG extraction models

**Updated**: Now uses the enhanced ontology analyzer that extracts 51 comprehensive indicators from the ontology.

In [None]:
# Install required packages if not already installed
# !pip install rdflib pandas matplotlib seaborn

In [None]:
import sys
sys.path.append('../src')

from ontology_analyzer import EnhancedESGOntologyAnalyzer
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

## 1. Load and Analyze the ESG Ontology

In [None]:
# Initialize the enhanced ontology analyzer
ontology_path = "../../esgontology.owl"
analyzer = EnhancedESGOntologyAnalyzer(ontology_path)

# Load the ontology
if analyzer.load_ontology():
    print("Ontology loaded successfully!")
else:
    print("Failed to load ontology")

In [None]:
# Perform complete analysis
analysis_result = analyzer.analyze_ontology()

# Display summary statistics
print("=== ESG Ontology Analysis Summary ===")
for key, value in analysis_result['summary'].items():
    print(f"{key.replace('_', ' ').title()}: {value}")

## 2. Explore ESG Indicators

In [None]:
# Create indicator mapping DataFrame
indicators_df = analyzer.create_indicator_mapping(analysis_result)
print(f"Found {len(indicators_df)} indicators in the ontology")
indicators_df.head(10)

In [None]:
# Analyze indicator types
type_counts = indicators_df['Type'].value_counts()
print("Indicator Types:")
print(type_counts)

# Visualize
plt.figure(figsize=(10, 6))
type_counts.plot(kind='bar')
plt.title('Distribution of ESG Indicator Types')
plt.xlabel('Indicator Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 3. Analyze Categories and Domains

In [None]:
# Analyze category distribution
category_counts = indicators_df['Category'].value_counts()
print("Category Distribution:")
print(category_counts)

# Visualize
plt.figure(figsize=(12, 8))
category_counts.plot(kind='bar')
plt.title('Distribution of ESG Categories')
plt.xlabel('Category')
plt.ylabel('Number of Indicators')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 4. Key ESG Indicators by Domain

In [None]:
# Focus on environmental indicators
environmental_keywords = ['energy', 'waste', 'water', 'carbon', 'emission', 'efficiency', 'environmental', 'climate', 'ghg']
environmental_indicators = indicators_df[
    indicators_df['Name'].str.lower().str.contains('|'.join(environmental_keywords), na=False)
]

print(f"Environmental Indicators ({len(environmental_indicators)}):")
for _, indicator in environmental_indicators.iterrows():
    print(f"- {indicator['Name']}: {indicator['Label']}")
    if len(environmental_indicators) > 10:
        break

In [None]:
# Focus on social indicators
social_keywords = ['employee', 'safety', 'diversity', 'training', 'health', 'human', 'labor', 'community']
social_indicators = indicators_df[
    indicators_df['Name'].str.lower().str.contains('|'.join(social_keywords), na=False)
]

print(f"Social Indicators ({len(social_indicators)}):")
for _, indicator in social_indicators.iterrows():
    print(f"- {indicator['Name']}: {indicator['Label']}")
    if len(social_indicators) > 10:
        break

In [None]:
# Focus on governance indicators
governance_keywords = ['board', 'governance', 'compliance', 'ethics', 'transparency', 'corruption', 'corporate']
governance_indicators = indicators_df[
    indicators_df['Name'].str.lower().str.contains('|'.join(governance_keywords), na=False)
]

print(f"Governance Indicators ({len(governance_indicators)}):")
for _, indicator in governance_indicators.iterrows():
    print(f"- {indicator['Name']}: {indicator['Label']}")
    if len(governance_indicators) > 10:
        break

## 5. Relationships and SDG Mappings

In [None]:
# Explore relationships
if 'relationships' in analysis_result:
    print(f"Found {len(analysis_result['relationships'])} relationships:")
    for rel in analysis_result['relationships']:
        print(f"- {rel}")

# Explore SDG mappings
if 'sdg_mappings' in analysis_result:
    print(f"\nFound {len(analysis_result['sdg_mappings'])} SDG mappings:")
    for mapping in analysis_result['sdg_mappings']:
        print(f"- {mapping}")

## 6. Save Results

In [None]:
# Save analysis results
output_dir = Path('../data/indicators')
output_dir.mkdir(exist_ok=True)

# Save complete analysis
with open(output_dir / 'esg_ontology_analysis.json', 'w') as f:
    json.dump(analysis_result, f, indent=2)

# Save indicator mapping
indicators_df.to_csv(output_dir / 'esg_indicators_mapping.csv', index=False)

print(f"Results saved to: {output_dir}")
print(f"- Analysis: esg_ontology_analysis.json")
print(f"- Indicators: esg_indicators_mapping.csv ({len(indicators_df)} indicators)")

## Summary

This notebook extracts **51 comprehensive ESG indicators** from the ontology:
- **2 explicit Performance Indicators**: EnergyEfficiency, WasteReductionIndicator
- **49 ESG Category Concepts**: Environmental, Social, and Governance categories
- **Relationships**: Semantic connections between indicators
- **SDG Mappings**: Links to UN Sustainable Development Goals

## Next Steps

1. **Model Training**: Use these 51 ontology-derived indicators for fine-tuning
2. **Framework Integration**: Combine with expanded GRI/SASB indicators
3. **Context-Aware Extraction**: Leverage semantic relationships for better extraction
4. **Evaluation**: Test model performance on corporate reports

The ontology provides the semantic foundation for understanding ESG concepts and their relationships.