# Social Network Analysis

This notebook creates and analyzes network representations from the collected social media data.

In [None]:
import os
import sys
from pathlib import Path
import json
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Add project root to Python path
project_root = Path.cwd().parent
sys.path.append(str(project_root))

# Import network creation functions
from src.analyzers.network_creator import (
    create_x_network,
    create_youtube_network,
    create_reddit_network,
    create_tiktok_network,
    analyze_network,
    export_network
)

## 1. Load Collected Data

First, let's load the most recent data collection results.

In [None]:
def load_latest_collection():
    """Load the most recent data collection metadata and files."""
    # Find the latest metadata file
    metadata_files = list(Path('data/raw').glob('collection_metadata_*.json'))
    if not metadata_files:
        raise FileNotFoundError("No collection metadata found")
    
    latest_metadata = max(metadata_files, key=lambda p: p.stat().st_mtime)
    
    # Load metadata
    with open(latest_metadata) as f:
        metadata = json.load(f)
    
    # Load CSV files
    data = {}
    for target, platform_files in metadata['files'].items():
        data[target] = {}
        for platform, file_path in platform_files.items():
            data[target][platform] = pd.read_csv(file_path)
    
    return metadata, data

# Load the data
metadata, collected_data = load_latest_collection()
print(f"Loaded data collection from: {metadata['timestamp']}")
print(f"Targets: {metadata['targets']}")

## 2. Create Networks

Create network representations for each platform and target.

In [None]:
def create_networks(data: dict, timestamp: str):
    """Create networks for all platforms and targets."""
    network_files = {}
    
    for target, platform_data in data.items():
        network_files[target] = {}
        print(f"\nCreating networks for: {target}")
        
        # Create X network
        if 'x' in platform_data:
            print("Creating X network...")
            network_files[target]['x'] = create_x_network(
                platform_data['x'], target, timestamp
            )
        
        # Create YouTube network
        if 'youtube' in platform_data:
            print("Creating YouTube network...")
            network_files[target]['youtube'] = create_youtube_network(
                platform_data['youtube'], target, timestamp
            )
        
        # Create Reddit network
        if 'reddit' in platform_data:
            print("Creating Reddit network...")
            network_files[target]['reddit'] = create_reddit_network(
                platform_data['reddit'], target, timestamp
            )
        
        # Create TikTok network
        if 'tiktok' in platform_data:
            print("Creating TikTok network...")
            network_files[target]['tiktok'] = create_tiktok_network(
                platform_data['tiktok'], target, timestamp
            )
    
    return network_files

# Create networks using the same timestamp as the collection
network_files = create_networks(collected_data, metadata['timestamp'])

## 3. Analyze Networks

Calculate and compare network metrics across platforms and targets.

In [None]:
def analyze_networks(network_files: dict):
    """Analyze all created networks."""
    analysis_results = {}
    
    for target, platform_files in network_files.items():
        analysis_results[target] = {}
        print(f"\nAnalyzing networks for: {target}")
        
        for platform, file_path in platform_files.items():
            print(f"Analyzing {platform} network...")
            G = nx.read_graphml(file_path)
            metrics = analyze_network(G)
            analysis_results[target][platform] = metrics
            
            # Print basic metrics
            print(f"  Nodes: {metrics['nodes']}")
            print(f"  Edges: {metrics['edges']}")
            print(f"  Density: {metrics['density']:.4f}")
    
    return analysis_results

# Analyze all networks
analysis_results = analyze_networks(network_files)

## 4. Export Networks

Export networks in different formats for use in other tools.

In [None]:
def export_all_networks(network_files: dict):
    """Export all networks in multiple formats."""
    exported_files = {}
    
    for target, platform_files in network_files.items():
        exported_files[target] = {}
        print(f"\nExporting networks for: {target}")
        
        for platform, file_path in platform_files.items():
            print(f"Exporting {platform} network...")
            export_paths = export_network(file_path, target, platform)
            exported_files[target][platform] = export_paths
            
            # Print export paths
            for format_name, path in export_paths.items():
                print(f"  {format_name}: {path}")
    
    return exported_files

# Export networks in different formats
exported_files = export_all_networks(network_files)

## 5. Save Analysis Results

Save the network analysis results for future reference.

In [None]:
# Save analysis results
analysis_output = {
    'timestamp': metadata['timestamp'],
    'targets': metadata['targets'],
    'network_files': network_files,
    'exported_files': exported_files,
    'analysis_results': analysis_results
}

output_file = f'data/networks/network_analysis_{metadata["timestamp"]}.json'
with open(output_file, 'w') as f:
    json.dump(analysis_output, f, indent=2)

print(f"\nAnalysis results saved to: {output_file}")