In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

In [None]:
class NetworkDissectionAnalyzer:
    def __init__(self):
        imagenet_csv_path = './results/resnet18_25_07_12_21_16/descriptions.csv'
        places365_csv_path = './results/resnet18_places_25_07_12_21_25/descriptions.csv'
        self.imagenet_df = pd.read_csv(imagenet_csv_path)
        self.places365_df = pd.read_csv(places365_csv_path)
        
        # Clean column names (remove whitespace)
        self.imagenet_df.columns = self.imagenet_df.columns.str.strip()
        self.places365_df.columns = self.places365_df.columns.str.strip()
        
        print("Data loaded successfully!")
        print(f"ImageNet model data shape: {self.imagenet_df.shape}")
        print(f"Places365 model data shape: {self.places365_df.shape}")
        
    def explore_data_structure(self):
        """Explore the structure of the loaded data"""
        print("\n=== DATA STRUCTURE EXPLORATION ===")
        print("\nImageNet CSV columns:")
        print(self.imagenet_df.columns.tolist())
        print("\nPlaces365 CSV columns:")
        print(self.places365_df.columns.tolist())
        
        print("\nImageNet CSV sample:")
        print(self.imagenet_df.head())
        print("\nPlaces365 CSV sample:")
        print(self.places365_df.head())
        
    def analyze_concept_distribution(self):
        """Analyze which concepts are learned by most neurons"""
        print("\n=== CONCEPT DISTRIBUTION ANALYSIS ===")
        
        # Assuming there's a column for concepts/labels - adjust column name as needed
        concept_cols = [col for col in self.imagenet_df.columns if 'concept' in col.lower() or 'label' in col.lower()]
        
        if not concept_cols:
            # If no obvious concept column, look for the main prediction/class column
            print("Looking for concept/class columns...")
            print("Available columns:", self.imagenet_df.columns.tolist())
            
            # Try to find the most likely concept column
            potential_cols = [col for col in self.imagenet_df.columns 
                             if any(keyword in col.lower() for keyword in ['class', 'prediction', 'top', 'best'])]
            
            if potential_cols:
                concept_col = potential_cols[0]
                print(f"Using column: {concept_col}")
            else:
                # Use the last non-numeric column as concept column
                concept_col = self.imagenet_df.select_dtypes(include=['object']).columns[-1]
                print(f"Using last text column: {concept_col}")
        else:
            concept_col = concept_cols[0]
            print(f"Using concept column: {concept_col}")
        
        # Count concepts for both models
        imagenet_concepts = self.imagenet_df[concept_col].value_counts()
        places365_concepts = self.places365_df[concept_col].value_counts()
        
        print(f"\nTop 10 concepts in ImageNet model:")
        print(imagenet_concepts.head(10))
        
        print(f"\nTop 10 concepts in Places365 model:")
        print(places365_concepts.head(10))
        
        return imagenet_concepts, places365_concepts, concept_col
    
    def compare_models(self, imagenet_concepts, places365_concepts):
        """Compare concepts learned by both models"""
        print("\n=== MODEL COMPARISON ===")
        
        # Basic statistics
        print(f"Total unique concepts in ImageNet model: {len(imagenet_concepts)}")
        print(f"Total unique concepts in Places365 model: {len(places365_concepts)}")
        
        # Find common and unique concepts
        imagenet_set = set(imagenet_concepts.index)
        places365_set = set(places365_concepts.index)
        
        common_concepts = imagenet_set.intersection(places365_set)
        imagenet_only = imagenet_set - places365_set
        places365_only = places365_set - imagenet_set
        
        print(f"Common concepts between models: {len(common_concepts)}")
        print(f"Concepts only in ImageNet model: {len(imagenet_only)}")
        print(f"Concepts only in Places365 model: {len(places365_only)}")
        
        # Show some examples
        print(f"\nTop 10 common concepts:")
        common_counts = [(concept, imagenet_concepts[concept], places365_concepts[concept]) 
                        for concept in common_concepts]
        common_counts.sort(key=lambda x: x[1] + x[2], reverse=True)
        for concept, img_count, places_count in common_counts[:10]:
            print(f"  {concept}: ImageNet={img_count}, Places365={places_count}")
        
        return common_concepts, imagenet_only, places365_only
    
    def analyze_layers(self):
        """Analyze neuron distribution across layers"""
        print("\n=== LAYER ANALYSIS ===")
        
        # Look for layer information
        layer_cols = [col for col in self.imagenet_df.columns if 'layer' in col.lower()]
        
        if layer_cols:
            layer_col = layer_cols[0]
            print(f"Using layer column: {layer_col}")
            
            # Count neurons per layer
            imagenet_layers = self.imagenet_df[layer_col].value_counts().sort_index()
            places365_layers = self.places365_df[layer_col].value_counts().sort_index()
            
            print(f"\nNeurons per layer - ImageNet model:")
            print(imagenet_layers)
            
            print(f"\nNeurons per layer - Places365 model:")
            print(places365_layers)
            
            return imagenet_layers, places365_layers, layer_col
        else:
            print("No layer column found in the data")
            return None, None, None
    
    def create_visualizations(self, imagenet_concepts, places365_concepts, concept_col, 
                            imagenet_layers=None, places365_layers=None, layer_col=None):
        """Create visualizations for the analysis"""
        print("\n=== CREATING VISUALIZATIONS ===")
        
        # Set up the plotting style
        plt.style.use('default')
        sns.set_palette("husl")
        
        # Create figure with subplots
        fig = plt.figure(figsize=(20, 15))
        
        # 1. Top concepts comparison
        ax1 = plt.subplot(2, 3, 1)
        top_imagenet = imagenet_concepts.head(15)
        top_places365 = places365_concepts.head(15)
        
        x = np.arange(len(top_imagenet))
        width = 0.35
        
        ax1.bar(x - width/2, top_imagenet.values, width, label='ImageNet', alpha=0.8)
        ax1.bar(x + width/2, top_places365.values[:len(top_imagenet)], width, label='Places365', alpha=0.8)
        
        ax1.set_xlabel('Concepts')
        ax1.set_ylabel('Number of Neurons')
        ax1.set_title('Top 15 Concepts: ImageNet vs Places365')
        ax1.set_xticks(x)
        ax1.set_xticklabels(top_imagenet.index, rotation=45, ha='right')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # 2. Concept distribution histogram
        ax2 = plt.subplot(2, 3, 2)
        bins = np.logspace(0, np.log10(max(imagenet_concepts.max(), places365_concepts.max())), 20)
        ax2.hist(imagenet_concepts.values, bins=bins, alpha=0.7, label='ImageNet', density=True)
        ax2.hist(places365_concepts.values, bins=bins, alpha=0.7, label='Places365', density=True)
        ax2.set_xlabel('Number of Neurons per Concept')
        ax2.set_ylabel('Density')
        ax2.set_title('Distribution of Neurons per Concept')
        ax2.set_xscale('log')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        
        # 3. Layer analysis (if available)
        if imagenet_layers is not None and places365_layers is not None:
            ax3 = plt.subplot(2, 3, 3)
            layers = sorted(set(imagenet_layers.index) | set(places365_layers.index))
            imagenet_vals = [imagenet_layers.get(layer, 0) for layer in layers]
            places365_vals = [places365_layers.get(layer, 0) for layer in layers]
            
            x = np.arange(len(layers))
            ax3.bar(x - width/2, imagenet_vals, width, label='ImageNet', alpha=0.8)
            ax3.bar(x + width/2, places365_vals, width, label='Places365', alpha=0.8)
            ax3.set_xlabel('Layer')
            ax3.set_ylabel('Number of Neurons')
            ax3.set_title('Neurons per Layer')
            ax3.set_xticks(x)
            ax3.set_xticklabels(layers, rotation=45)
            ax3.legend()
            ax3.grid(True, alpha=0.3)
        
        # 4. Unique vs Common concepts
        ax4 = plt.subplot(2, 3, 4)
        imagenet_set = set(imagenet_concepts.index)
        places365_set = set(places365_concepts.index)
        common = len(imagenet_set.intersection(places365_set))
        imagenet_only = len(imagenet_set - places365_set)
        places365_only = len(places365_set - imagenet_set)
        
        categories = ['Common', 'ImageNet Only', 'Places365 Only']
        values = [common, imagenet_only, places365_only]
        colors = ['green', 'blue', 'red']
        
        wedges, texts, autotexts = ax4.pie(values, labels=categories, autopct='%1.1f%%', 
                                          colors=colors, startangle=90)
        ax4.set_title('Concept Distribution Comparison')
        
        # 5. Cumulative distribution
        ax5 = plt.subplot(2, 3, 5)
        imagenet_sorted = np.sort(imagenet_concepts.values)[::-1]
        places365_sorted = np.sort(places365_concepts.values)[::-1]
        
        imagenet_cumsum = np.cumsum(imagenet_sorted) / np.sum(imagenet_sorted)
        places365_cumsum = np.cumsum(places365_sorted) / np.sum(places365_sorted)
        
        ax5.plot(range(len(imagenet_cumsum)), imagenet_cumsum, label='ImageNet', linewidth=2)
        ax5.plot(range(len(places365_cumsum)), places365_cumsum, label='Places365', linewidth=2)
        ax5.set_xlabel('Concepts (ranked by neuron count)')
        ax5.set_ylabel('Cumulative Proportion of Neurons')
        ax5.set_title('Cumulative Distribution of Neurons')
        ax5.legend()
        ax5.grid(True, alpha=0.3)
        
        # 6. Scatter plot comparison
        ax6 = plt.subplot(2, 3, 6)
        common_concepts = imagenet_set.intersection(places365_set)
        if common_concepts:
            imagenet_vals = [imagenet_concepts[concept] for concept in common_concepts]
            places365_vals = [places365_concepts[concept] for concept in common_concepts]
            
            ax6.scatter(imagenet_vals, places365_vals, alpha=0.6, s=50)
            ax6.set_xlabel('ImageNet Neurons')
            ax6.set_ylabel('Places365 Neurons')
            ax6.set_title('Common Concepts: Neuron Count Comparison')
            ax6.plot([0, max(imagenet_vals)], [0, max(imagenet_vals)], 'r--', alpha=0.5)
            ax6.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('network_dissection_analysis.png', dpi=300, bbox_inches='tight')
        plt.show()
        
        # Create summary statistics
        self.print_summary_statistics(imagenet_concepts, places365_concepts)
    
    def print_summary_statistics(self, imagenet_concepts, places365_concepts):
        """Print comprehensive summary statistics"""
        print("\n=== SUMMARY STATISTICS ===")
        
        print(f"ImageNet Model:")
        print(f"  Total neurons analyzed: {imagenet_concepts.sum()}")
        print(f"  Unique concepts: {len(imagenet_concepts)}")
        print(f"  Mean neurons per concept: {imagenet_concepts.mean():.2f}")
        print(f"  Median neurons per concept: {imagenet_concepts.median():.2f}")
        print(f"  Max neurons for single concept: {imagenet_concepts.max()}")
        
        print(f"\nPlaces365 Model:")
        print(f"  Total neurons analyzed: {places365_concepts.sum()}")
        print(f"  Unique concepts: {len(places365_concepts)}")
        print(f"  Mean neurons per concept: {places365_concepts.mean():.2f}")
        print(f"  Median neurons per concept: {places365_concepts.median():.2f}")
        print(f"  Max neurons for single concept: {places365_concepts.max()}")
        
        # Diversity metrics
        imagenet_entropy = -np.sum((imagenet_concepts / imagenet_concepts.sum()) * 
                                  np.log(imagenet_concepts / imagenet_concepts.sum()))
        places365_entropy = -np.sum((places365_concepts / places365_concepts.sum()) * 
                                   np.log(places365_concepts / places365_concepts.sum()))
        
        print(f"\nDiversity Metrics:")
        print(f"  ImageNet entropy: {imagenet_entropy:.3f}")
        print(f"  Places365 entropy: {places365_entropy:.3f}")
        print(f"  Higher entropy indicates more diverse concept learning")
    
    def generate_insights(self, imagenet_concepts, places365_concepts, common_concepts, 
                         imagenet_only, places365_only):
        """Generate insights and findings"""
        print("\n=== KEY INSIGHTS AND FINDINGS ===")
        
        # 1. Specialization analysis
        print("1. Model Specialization:")
        if len(places365_only) > len(imagenet_only):
            print("   - Places365 model shows more specialized concepts")
        else:
            print("   - ImageNet model shows more specialized concepts")
        
        # 2. Concept overlap
        overlap_ratio = len(common_concepts) / len(set(imagenet_concepts.index) | set(places365_concepts.index))
        print(f"   - Concept overlap ratio: {overlap_ratio:.3f}")
        
        # 3. Distribution analysis
        imagenet_top10_ratio = imagenet_concepts.head(10).sum() / imagenet_concepts.sum()
        places365_top10_ratio = places365_concepts.head(10).sum() / places365_concepts.sum()
        
        print(f"2. Concentration Analysis:")
        print(f"   - ImageNet: Top 10 concepts account for {imagenet_top10_ratio:.1%} of neurons")
        print(f"   - Places365: Top 10 concepts account for {places365_top10_ratio:.1%} of neurons")
        
        # 4. Unique concept examples
        print(f"3. Dataset-Specific Learning:")
        print(f"   - ImageNet-only concepts (examples): {list(imagenet_only)[:5]}")
        print(f"   - Places365-only concepts (examples): {list(places365_only)[:5]}")
        
        # 5. Common high-activation concepts
        if common_concepts:
            common_high = [(concept, imagenet_concepts[concept] + places365_concepts[concept]) 
                          for concept in common_concepts]
            common_high.sort(key=lambda x: x[1], reverse=True)
            print(f"4. Most Important Common Concepts:")
            for concept, total_neurons in common_high[:5]:
                print(f"   - {concept}: {total_neurons} total neurons")
    
    def run_complete_analysis(self):
        """Run the complete analysis pipeline"""
        print("Starting Network Dissection Analysis...")
        
        # Step 1: Explore data structure
        self.explore_data_structure()
        
        # Step 2: Analyze concept distribution
        imagenet_concepts, places365_concepts, concept_col = self.analyze_concept_distribution()
        
        # Step 3: Compare models
        common_concepts, imagenet_only, places365_only = self.compare_models(imagenet_concepts, places365_concepts)
        
        # Step 4: Analyze layers
        imagenet_layers, places365_layers, layer_col = self.analyze_layers()
        
        # Step 5: Create visualizations
        self.create_visualizations(imagenet_concepts, places365_concepts, concept_col,
                                 imagenet_layers, places365_layers, layer_col)
        
        # Step 6: Generate insights
        self.generate_insights(imagenet_concepts, places365_concepts, common_concepts,
                              imagenet_only, places365_only)
        
        print("\n=== ANALYSIS COMPLETE ===")
        print("Visualization saved as 'network_dissection_analysis.png'")
        print("Use the insights above for your report!")

In [None]:
# Usage example:
if __name__ == "__main__":
    
    # Create analyzer instance
    analyzer = NetworkDissectionAnalyzer()
    
    # Run complete analysis
    analyzer.run_complete_analysis()
    
    # You can also run individual analyses:
    # analyzer.explore_data_structure()
    # imagenet_concepts, places365_concepts, concept_col = analyzer.analyze_concept_distribution()
    # common_concepts, imagenet_only, places365_only = analyzer.compare_models(imagenet_concepts, places365_concepts)