# Network TQ Analysis
 
 This notebook processes network log files and generates TQ (Transmission Quality) graphs for each user/host over time.


##Setup and Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
# Import your log parser
from log_parser import process_all_logs_in_folder, load_pickle_file

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


## Process Log Files

First, let's process all the log files in the results folder.


In [None]:
# Process all log files and generate pickle files
print("Processing log files...")
is_completed = process_all_logs_in_folder("dataset")

if not is_completed:
    print("No data processed. Please check your log files.")
else:
    print(f"Successfully processed all files")


## Data Overview


In [None]:
all_data=load_pickle_file('./dataset/results-1/General-#0.pkl')
    # Add file number as a column and combine all dat


# ## TQ Analysis Functions


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import numpy as np

def plot_tq_for_host_by_orig_neigh(df, host_name, save_plots=True, output_dir="tq_plots"):
    """
    Plot TQ metrics over time for a specific host, creating separate plots for each 
    orig_ip and neigh_ip combination.
    
    Args:
        df: DataFrame with network data
        host_name: Name of the host to plot
        save_plots: Whether to save plots to files
        output_dir: Directory to save plots
    """
    # Filter data for specific host
    host_data = df[df['host'] == host_name].copy()
    
    if host_data.empty:
        print(f"No data found for host: {host_name}")
        return
    
    # Sort by timestamp
    host_data = host_data.sort_values('timestamp')
    
    # Get unique combinations of orig_ip and neigh_ip
    combinations = host_data[['orig_ip', 'neigh_ip']].drop_duplicates()
    
    print(f"\nFound {len(combinations)} unique orig_ip/neigh_ip combinations for host {host_name}")
    
    # Create aggregated plot (mean across all combinations)
    plot_aggregated_tq(host_data, host_name, save_plots, output_dir)
    
    # Create individual plots for each combination
    for _, row in combinations.iterrows():
        orig_ip = row['orig_ip']
        neigh_ip = row['neigh_ip']
        plot_tq_for_combination(host_data, host_name, orig_ip, neigh_ip, save_plots, output_dir)

def plot_aggregated_tq(host_data, host_name, save_plots, output_dir):
    """
    Create aggregated plot showing mean TQ metrics across all combinations for a host.
    """
    # Group by timestamp and calculate means across all combinations
    agg_data = host_data.groupby('timestamp').agg({
        'local_tq': ['mean', 'std'],
        'total_tq': ['mean', 'std'],
        'asym_penalty': ['mean', 'std']
    }).reset_index()
    
    # Flatten column names
    agg_data.columns = ['timestamp', 'local_tq_mean', 'local_tq_std', 
                       'total_tq_mean', 'total_tq_std', 
                       'asym_penalty_mean', 'asym_penalty_std']
    
    # Create figure with subplots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle(f'Aggregated TQ Analysis for {host_name}', 
                 fontsize=16, fontweight='bold')
    
    # Plot 1: Local TQ over time
    axes[0, 0].errorbar(agg_data['timestamp'], agg_data['local_tq_mean'], 
                       yerr=agg_data['local_tq_std'], fmt='b-', linewidth=1, 
                       alpha=0.7, capsize=3)
    axes[0, 0].set_title('Mean Local TQ over Time')
    axes[0, 0].set_xlabel('Timestamp')
    axes[0, 0].set_ylabel('Local TQ')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot 2: Total TQ over time
    axes[0, 1].errorbar(agg_data['timestamp'], agg_data['total_tq_mean'], 
                       yerr=agg_data['total_tq_std'], fmt='r-', linewidth=1, 
                       alpha=0.7, capsize=3)
    axes[0, 1].set_title('Mean Total TQ over Time')
    axes[0, 1].set_xlabel('Timestamp')
    axes[0, 1].set_ylabel('Total TQ')
    axes[0, 1].grid(True, alpha=0.3)
    
    # Plot 3: Local vs Total TQ scatter
    axes[1, 0].scatter(host_data['local_tq'], host_data['total_tq'], 
                      alpha=0.4, s=10, c='gray')
    axes[1, 0].set_title('Local TQ vs Total TQ')
    axes[1, 0].set_xlabel('Local TQ')
    axes[1, 0].set_ylabel('Total TQ')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Plot 4: Asymmetry penalty over time
    axes[1, 1].errorbar(agg_data['timestamp'], agg_data['asym_penalty_mean'], 
                       yerr=agg_data['asym_penalty_std'], fmt='g-', linewidth=1, 
                       alpha=0.7, capsize=3)
    axes[1, 1].set_title('Mean Asymmetry Penalty over Time')
    axes[1, 1].set_xlabel('Timestamp')
    axes[1, 1].set_ylabel('Asymmetry Penalty')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    
    if save_plots:
        Path(output_dir).mkdir(exist_ok=True)
        safe_hostname = host_name.replace('/', '_').replace('[', '_').replace(']', '_')
        filename = f"{output_dir}/tq_analysis_aggregated_{safe_hostname}.png"
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        print(f"Saved aggregated plot: {filename}")
    
    plt.show()

def plot_tq_for_combination(host_data, host_name, orig_ip, neigh_ip, save_plots, output_dir):
    """
    Create plot for a specific orig_ip and neigh_ip combination.
    """
    # Filter data for specific combination
    combo_data = host_data[(host_data['orig_ip'] == orig_ip) & 
                          (host_data['neigh_ip'] == neigh_ip)].copy()
    
    if combo_data.empty:
        return
    
    # Sort by timestamp
    combo_data = combo_data.sort_values('timestamp')
    
    # Create figure with subplots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle(f'TQ Analysis for {host_name}\Origin: {orig_ip}  neighbor: {neigh_ip}', 
                 fontsize=14, fontweight='bold')
    
    # Plot 1: Local TQ over time
    axes[0, 0].plot(combo_data['timestamp'], combo_data['local_tq'], 'b-', linewidth=1.5)
    axes[0, 0].set_title('Local TQ over Time')
    axes[0, 0].set_xlabel('Timestamp')
    axes[0, 0].set_ylabel('Local TQ')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot 2: Total TQ over time
    axes[0, 1].plot(combo_data['timestamp'], combo_data['total_tq'], 'r-', linewidth=1.5)
    axes[0, 1].set_title('Total TQ over Time')
    axes[0, 1].set_xlabel('Timestamp')
    axes[0, 1].set_ylabel('Total TQ')
    axes[0, 1].grid(True, alpha=0.3)
    
    # Plot 3: Local vs Total TQ scatter
    axes[1, 0].scatter(combo_data['local_tq'], combo_data['total_tq'], alpha=0.6, s=20)
    axes[1, 0].set_title('Local TQ vs Total TQ')
    axes[1, 0].set_xlabel('Local TQ')
    axes[1, 0].set_ylabel('Total TQ')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Plot 4: Asymmetry penalty over time
    axes[1, 1].plot(combo_data['timestamp'], combo_data['asym_penalty'], 'g-', linewidth=1.5)
    axes[1, 1].set_title('Asymmetry Penalty over Time')
    axes[1, 1].set_xlabel('Timestamp')
    axes[1, 1].set_ylabel('Asymmetry Penalty')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    
    if save_plots:
        Path(output_dir).mkdir(exist_ok=True)
        safe_hostname = host_name.replace('/', '_').replace('[', '_').replace(']', '_')
        safe_orig = orig_ip.replace('/', '_').replace('[', '_').replace(']', '_').replace(':', '_')
        safe_neigh = neigh_ip.replace('/', '_').replace('[', '_').replace(']', '_').replace(':', '_')
        filename = f"{output_dir}/tq_{safe_hostname}_{safe_orig}_to_{safe_neigh}.png"
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        print(f"Saved plot: {filename}")
    
    plt.show()
    
    # Print statistics
    print(f"\nStats for {host_name} ({orig_ip} → {neigh_ip}):")
    print(f"Records: {len(combo_data)}")
    print(f"Local TQ - Mean: {combo_data['local_tq'].mean():.2f}")
    print(f"Total TQ - Mean: {combo_data['total_tq'].mean():.2f}")
    print(f"Asym Penalty - Mean: {combo_data['asym_penalty'].mean():.2f}")

def plot_combination_summary(df, host_name):
    """
    Create a summary showing all orig_ip/neigh_ip combinations for a host.
    """
    host_data = df[df['host'] == host_name].copy()
    
    if host_data.empty:
        print(f"No data found for host: {host_name}")
        return
    
    # Get combination statistics
    combo_stats = host_data.groupby(['orig_ip', 'neigh_ip']).agg({
        'local_tq': 'mean',
        'total_tq': 'mean',
        'asym_penalty': 'mean',
        'timestamp': 'count'
    }).round(2)
    
    combo_stats.columns = ['local_tq_mean', 'total_tq_mean', 'asym_penalty_mean', 'record_count']
    combo_stats = combo_stats.sort_values('record_count', ascending=False)
    
    print(f"\nCombination Summary for {host_name}:")
    print(combo_stats)

# Example usage:
# plot_tq_for_host_by_orig_neigh(df, "your_host_name")
# plot_combination_summary(df, "your_host_name")


# ## Generate TQ Graphs for All Hosts


In [None]:
df=all_data
if True:
    # Process each file separately
    #for file_num, df in dataframes.items():
    print(f"\n{'='*50}")
    #print(f"Processing File: General-{file_num}.out")
    print(f"{'='*50}")
    
    # Get unique hosts in this file
    unique_hosts = df['host'].unique()
    print(f"Found {len(unique_hosts)} unique hosts in this file")
    
    # Create output directory for this file
    output_dir = f"tq_plots/file_1"
    
    # Generate plots for each host
    for i, host in enumerate(unique_hosts):
        print(f"\nGenerating plots for host {i+1}/{len(unique_hosts)}: {host}")
        plot_tq_for_host_by_orig_neigh(df, host, save_plots=True, output_dir=output_dir)
    
    # Generate comparison plot for top hosts in this file
    #print(f"\nGenerating comparison plot for file {file_num}...")
    #if len(unique_hosts) > 1:
    #    plt.figure(figsize=(12, 6))
    #    plot_tq_comparison(df, hosts_to_compare=unique_hosts[:5] if len(unique_hosts) > 5 else unique_hosts)
    #    
    #    # Save comparison plot
    #    Path(output_dir).mkdir(exist_ok=True)
    #    plt.savefig(f"{output_dir}/tq_comparison.png", dpi=300, bbox_inches='tight')
    #    plt.show()


# ## Overall Analysis Across All Files


In [None]:
if dataframes and len(dataframes) > 1:
    print(f"\n{'='*50}")
    print("Overall Analysis Across All Files")
    print(f"{'='*50}")
    
    # Overall statistics
    print("\nOverall TQ Statistics:")
    print(all_data[['local_tq', 'total_tq', 'asym_penalty', 'own_bcast', 'real_recv']].describe())
    
    # Host performance comparison across all files
    print("\nHost Performance Summary:")
    host_summary = all_data.groupby('host').agg({
        'local_tq': ['mean', 'std', 'count'],
        'total_tq': ['mean', 'std'],
        'asym_penalty': ['mean', 'std']
    }).round(2)
    
    print(host_summary.head(10))
    
    # Generate overall comparison plots
    plt.figure(figsize=(15, 10))
    
    # Subplot 1: Average TQ by host
    plt.subplot(2, 2, 1)
    host_avg_tq = all_data.groupby('host')[['local_tq', 'total_tq']].mean()
    host_avg_tq.head(10).plot(kind='bar', ax=plt.gca())
    plt.title('Average TQ by Host (Top 10)')
    plt.xticks(rotation=45)
    
    # Subplot 2: TQ distribution
    plt.subplot(2, 2, 2)
    plt.hist(all_data['local_tq'], alpha=0.5, label='Local TQ', bins=30)
    plt.hist(all_data['total_tq'], alpha=0.5, label='Total TQ', bins=30)
    plt.title('TQ Distribution')
    plt.legend()
    
    # Subplot 3: TQ over time (sample hosts)
    plt.subplot(2, 2, 3)
    sample_hosts = all_data['host'].value_counts().head(3).index
    for host in sample_hosts:
        host_data = all_data[all_data['host'] == host].sort_values('timestamp')
        plt.plot(host_data['timestamp'], host_data['local_tq'], label=host, alpha=0.7)
    plt.title('Local TQ Over Time (Top 3 Hosts)')
    plt.legend()
    
    # Subplot 4: Asymmetry penalty distribution
    plt.subplot(2, 2, 4)
    plt.boxplot([all_data[all_data['host'] == host]['asym_penalty'].values 
                for host in sample_hosts], labels=sample_hosts)
    plt.title('Asymmetry Penalty Distribution')
    plt.xticks(rotation=45)
    
    plt.tight_layout()
    plt.savefig('tq_plots/overall_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()

# ## Summary and Export


In [None]:
if dataframes:
    print(f"\n{'='*50}")
    print("Analysis Complete!")
    print(f"{'='*50}")
    
    # Export summary statistics
    summary_stats = []
    for file_num, df in dataframes.items():
        for host in df['host'].unique():
            host_data = df[df['host'] == host]
            summary_stats.append({
                'file_number': file_num,
                'host': host,
                'record_count': len(host_data),
                'avg_local_tq': host_data['local_tq'].mean(),
                'std_local_tq': host_data['local_tq'].std(),
                'avg_total_tq': host_data['total_tq'].mean(),
                'std_total_tq': host_data['total_tq'].std(),
                'avg_asym_penalty': host_data['asym_penalty'].mean(),
                'time_span': host_data['timestamp'].max() - host_data['timestamp'].min()
            })
    
    summary_df = pd.DataFrame(summary_stats)
    summary_df.to_csv('tq_analysis_summary.csv', index=False)
    
    print(f"Generated plots for {len(summary_df)} host-file combinations")
    print(f"Plots saved in: tq_plots/ directory")
    print(f"Summary statistics saved to: tq_analysis_summary.csv")
    
    # Show final summary
    print("\nFinal Summary:")
    print(f"Files processed: {len(dataframes)}")
    print(f"Total records: {len(all_data) if 'all_data' in locals() else 'N/A'}")
    print(f"Unique hosts: {summary_df['host'].nunique()}")
    print(f"Average records per host: {summary_df['record_count'].mean():.1f}")
