In [None]:
# Import necessary libraries
import pandas as pd  
import networkx as nx  
import numpy as np  
import matplotlib.pyplot as plt  
import os  

# Define a class to handle network disruption analysis
class NetworkDisruption:
    def __init__(self, edge_file, key_file, capital_strategy):
        # Initialize the object with the edge data and key data from CSV files
        self.df_edges = pd.read_csv(edge_file, sep=';')
        self.df_key = pd.read_csv(key_file, sep=';')
        self.G = self.create_graph(self.df_edges)  # Create a graph from the edges data
        self.total_nodes = len(self.G.nodes())  # Total number of nodes in the graph
        self.capital_strategy = capital_strategy  # Strategy (human, social, mixed)

    # Create a graph from the edges DataFrame
    def create_graph(self, df_edges):
        G = nx.Graph()  # Create an empty graph
        G.add_edges_from(df_edges.values.tolist())  # Add edges from the CSV data
        return G  # Return the created graph

    # Calculate the number of components and the size of the largest component
    def components_graph(self, graph):
        components = list(nx.connected_components(graph))  # Find all connected components
        num_components = len(components)  # Get the number of components
        size_largest_component = max(len(c) for c in components)  # Find the largest component
        return num_components, size_largest_component  # Return both values

    # Replace values in the DataFrame based on the key array
    def substitute_values(self, df, key, n):
        return df.replace(key[n], key[n + 1])

    # Remove rows from the edge DataFrame where both nodes are the same (self-loops)
    def delete_equal_values(self, df_edges):
        return df_edges[df_edges.iloc[:, 0] != df_edges.iloc[:, 1]]

    # Remove a node from the graph to simulate a disruption
    def remodel_node(self, G, node_key):
        new_G = G.copy()  # Create a copy of the graph
        if node_key in new_G.nodes:
            new_G.remove_node(node_key)  # Remove the node if it exists in the graph
        return new_G  # Return the modified graph

    # Normalize a vector of values to the range [0, 1]
    def normalize_vector(self, vector):
        min_value, max_value = np.min(vector), np.max(vector)
        return (vector - min_value) / (max_value - min_value)

    # Find the index of the maximum value in a vector
    def find_position(self, vector):
        return np.argmax(vector)

    # Perform network disruption for a given model (based on column index in the key file)
    def disrupt_network(self, model_index):
        key = np.array(self.df_key.iloc[:, model_index])  # Get the key for the current model
        steps, components, size_largest_component, global_efficiency = [], [], [], []
        
        # Initialize the graph and edge data
        G, df_edges = self.G.copy(), self.df_edges.copy()
        num_components, largest_component = self.components_graph(G)
        components.append(num_components)  # Store initial number of components
        size_largest_component.append(largest_component)  # Store initial largest component size
        global_efficiency.append(nx.global_efficiency(G))  # Store initial global efficiency
        steps.append(self.total_nodes - len(G.nodes()))  # Store initial number of steps
        
        # Iteratively disrupt the network by removing nodes
        for n in range(len(G) - 1):
            G = self.remodel_node(G, key[n])  # Remove node according to the key
            num_components, largest_component = self.components_graph(G)
            components.append(num_components)  # Append the new number of components
            size_largest_component.append(largest_component)  # Append the new largest component size
            global_efficiency.append(nx.global_efficiency(G))  # Append the new global efficiency
            
            # Update the edges data
            df_edges = self.substitute_values(df_edges, key, n)
            df_edges = self.delete_equal_values(df_edges)
            G = self.create_graph(df_edges)  # Rebuild the graph after node removal
            steps.append(self.total_nodes - len(G.nodes()))  # Record the steps
        
        # Return the calculated results as a dictionary
        return {
            'steps': steps,
            'components': components,
            'size_largest_component': size_largest_component,
            'global_efficiency': global_efficiency,
            'normalized_components': self.normalize_vector(components),
            'normalized_size_largest_component': self.normalize_vector(size_largest_component),
            'normalized_global_efficiency': self.normalize_vector(global_efficiency)
        }

    # Run the network disruption analysis for all models
    def run_all_models(self):
        results = {}  # Store results for each model
        best_model = None  # Track the best model
        best_steps = float('inf')  # Initialize the best steps with a very high number
        
        # List of models to evaluate
        models = ['OS1', 'OS2', 'IsF', 'LOF', 'COV', 'SVM']
        print(f"Capital: {self.capital_strategy}\n")  # Print the strategy being evaluated
        
        # Loop through all models
        for i, model in enumerate(models):
            result = self.disrupt_network(i)  # Disrupt network using the model
            results[model] = result  # Store the result
            pos = self.find_position(result['components'])  # Find when the network is disrupted
            steps_to_disruption = pos
            
            # Print results for this model
            print(f"{model}")
            print(f"Steps to disruption: {pos}")
            print(f"Number of components: {result['components'][pos]}")
            print(f"Number of nodes in the largest component: {result['size_largest_component'][pos]}")
            print(f"Global Efficiency: {result['global_efficiency'][pos]}\n")
            
            # Update the best model if it disrupts the network faster
            if steps_to_disruption < best_steps:
                best_steps = steps_to_disruption
                best_model = model
        
        # Print the best model and its steps
        print(f"\nFor the {self.capital_strategy} capital strategy, the best disruption model is {best_model} with {best_steps} steps.")
        return results, best_model, best_steps

    # Plot results for all models and highlight the best model
    def plot_results(self, results, best_model, best_steps):
        colors = ['b', 'g', 'r', 'c', 'm', 'y']  # Colors for each model
        models = ['OS1', 'OS2', 'IsF', 'LOF', 'COV', 'SVM']  # List of models
        best_model_color = colors[models.index(best_model)]  # Color for the best model
        
        # Create subplots for visualizing different metrics
        fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 18))

        # Loop through each model and plot the results
        for i, model in enumerate(models):
            result = results[model]
            
            axes[0].plot(result['steps'], result['normalized_components'], label=model, color=colors[i])
            axes[1].plot(result['steps'], result['normalized_global_efficiency'], label=model, color=colors[i])
            axes[2].plot(result['steps'], result['normalized_size_largest_component'], label=model, color=colors[i])

        # Set titles, labels, and highlight the best model
        axes[0].set_title(f'{self.capital_strategy} capital ', fontsize=28)
        axes[0].axvline(x=best_steps, color=best_model_color, linestyle='dashed')
        axes[0].set_ylabel('number of components', fontsize=28)
        
        axes[1].axvline(x=best_steps, color=best_model_color, linestyle='dashed')
        axes[1].set_ylabel('global efficiency', fontsize=28)
        
        axes[2].set_xlabel('steps', fontsize=28)
        axes[2].axvline(x=best_steps, color=best_model_color, linestyle='dashed')
        axes[2].set_ylabel('largest component size', fontsize=28)
        
        # Add legends and adjust tick sizes
        for ax in axes:
            ax.legend(loc='upper right', fontsize=22)
            ax.tick_params(axis='x', labelsize=26)
            ax.tick_params(axis='y', labelsize=26)
        
        # Create the figure folder if it does not exist
        if not os.path.exists('figure'):
            os.makedirs('figure')

        # Save the plot and display it
        plt.tight_layout()
        plt.savefig(f'figure/NetworkDisruption_{self.capital_strategy}.png', dpi=300)
        plt.show()
    
    # Plot the best strategy's results
    def plot_best_strategy(self, results, best_model, best_strategy):
        steps = results[best_model]['steps']
        normalized_components = results[best_model]['normalized_components']
        normalized_efficiency = results[best_model]['normalized_global_efficiency']
        normalized_size_largest_component = results[best_model]['normalized_size_largest_component']

        # Create a plot for the best strategy
        plt.figure(figsize=(12, 8))

        plt.plot(steps, normalized_components, label='number of components', color='b')
        plt.plot(steps, normalized_efficiency, label='global efficiency', color='g')
        plt.plot(steps, normalized_size_largest_component, label='largest component size', color='c')

        best_step_position = self.find_position(results[best_model]['components'])  # Highlight the disruption point
        plt.axvline(x=steps[best_step_position], color='r', linestyle='dashed')

        # Set labels, title, and display the plot
        plt.xlabel('steps', fontsize=18)
        plt.ylabel('metrics', fontsize=18)
        plt.legend(loc='upper right', fontsize=18)
        #plt.title(f'Best disruption model: {best_model} for {best_strategy} capital', fontsize=20)
        plt.tick_params(axis='x', labelsize=16)
        plt.tick_params(axis='y', labelsize=16)
        plt.tight_layout()
        plt.savefig(f'figure/BestStrategy_{best_model}.png', dpi=300)
        plt.show()

# Function to verify the best disruption model across all capital strategies
def verificar_melhor_modelo(edge_file, key_files):
    melhor_modelo_global = None  # Track the best global model
    menor_passos_global = float('inf')  # Initialize best steps globally with a large number
    melhor_estrategia_capital = None  # Track the best capital strategy
    all_results = {}  # Store all results for future plotting
    
    # Loop through each capital strategy and its corresponding key file
    for key_file, capital_strategy in key_files:
        network_disruption = NetworkDisruption(edge_file, key_file, capital_strategy)  # Create a NetworkDisruption object
        results, best_model, best_steps = network_disruption.run_all_models()  # Run disruption for all models
        network_disruption.plot_results(results, best_model, best_steps)  # Plot the results
        
        all_results[(capital_strategy, best_model)] = results  # Store results for this strategy
        
        # Update the global best model if needed
        if best_steps < menor_passos_global:
            menor_passos_global = best_steps
            melhor_modelo_global = best_model
            melhor_estrategia_capital = capital_strategy
    
    # Print the best global model and its steps
    print(f"\nThe best disruption model is {melhor_modelo_global} with {menor_passos_global} steps for {melhor_estrategia_capital} capital.")
    
    # Plot the best strategy's results
    network_disruption.plot_best_strategy(all_results[(melhor_estrategia_capital, melhor_modelo_global)], melhor_modelo_global, melhor_estrategia_capital)
    
    # Save the best model and capital strategy to a text file
    with open('data/parameters.txt', 'w') as f:
        f.write(f"{melhor_modelo_global}\n")
        f.write(f"{melhor_estrategia_capital}\n")
        f.write(f"{menor_passos_global}\n")

# Main Method - Program Execution
def main():
    with open(os.path.join('data', 'network.txt'), 'r') as file:
        network_filename = file.readline().strip()  # Read the network filename from a text file

    edge_file = os.path.join('data', network_filename)  # Path to the edges file

    key_files = [
        ('data/key_human.csv', 'human'),  # Human capital strategy
        ('data/key_social.csv', 'social'),  # Social capital strategy
        ('data/key_mixed.csv', 'mix')  # Mixed capital strategy
    ]
    verificar_melhor_modelo(edge_file, key_files)  # Run the verification for all strategies
    
# Check if the script is being run directly
if __name__ == "__main__":
    # If so, run the main function
    main()

# Documentation for the Network Disruption Analysis Script

## Overview
This script analyzes the resilience of networks under different disruption strategies. Using centrality measures and anomaly-based models, it determines the most effective method to fragment a network. The analysis includes:
- Evaluation of different disruption models (e.g., OS1, OS2, IsF, LOF, etc.).
- Simulation of node removal based on predetermined keys.
- Metrics like number of components, largest component size, and global efficiency.
- Visualization of results to identify the most effective disruption strategy.

---

## Modules and Functions

### **Imports**
- `pandas`: Handles data manipulation and reading/writing CSV files.
- `networkx`: Constructs and analyzes graphs.
- `numpy`: Performs numerical operations like normalization.
- `matplotlib`: Visualizes results with plots.
- `os`: Interacts with the file system.

---

### **Class: `NetworkDisruption`**
Handles the core functionality of the analysis, including graph construction, simulation of disruptions, and metric calculations.

#### **`__init__(self, edge_file, key_file, capital_strategy)`**
- **Parameters**:
  - `edge_file`: Path to the CSV file containing edge data.
  - `key_file`: Path to the CSV file containing disruption keys.
  - `capital_strategy`: Strategy being evaluated (e.g., 'human', 'social', 'mixed').
- **Attributes**:
  - `self.df_edges`: DataFrame of edges.
  - `self.df_key`: DataFrame of disruption keys.
  - `self.G`: NetworkX graph created from edge data.
  - `self.total_nodes`: Total number of nodes in the graph.
  - `self.capital_strategy`: Disruption strategy.

---

#### **`create_graph(self, df_edges)`**
- **Purpose**: Constructs a graph from the edge DataFrame.
- **Input**: `df_edges` (DataFrame of edges).
- **Output**: Graph object.

---

#### **`components_graph(self, graph)`**
- **Purpose**: Calculates the number of connected components and the size of the largest component.
- **Input**: `graph` (NetworkX graph).
- **Output**: Tuple `(num_components, size_largest_component)`.

---

#### **`substitute_values(self, df, key, n)`**
- **Purpose**: Replaces nodes in the edge DataFrame based on the disruption key.
- **Input**:
  - `df`: Edge DataFrame.
  - `key`: Disruption key array.
  - `n`: Index for the key.
- **Output**: Updated DataFrame.

---

#### **`delete_equal_values(self, df_edges)`**
- **Purpose**: Removes self-loops from the edge DataFrame.
- **Input**: `df_edges` (DataFrame of edges).
- **Output**: Updated DataFrame.

---

#### **`remodel_node(self, G, node_key)`**
- **Purpose**: Simulates the removal of a node from the graph.
- **Input**:
  - `G`: NetworkX graph.
  - `node_key`: Node to be removed.
- **Output**: Modified graph.

---

#### **`normalize_vector(self, vector)`**
- **Purpose**: Normalizes a vector to the range `[0, 1]`.
- **Input**: `vector` (numpy array).
- **Output**: Normalized array.

---

#### **`find_position(self, vector)`**
- **Purpose**: Finds the index of the maximum value in a vector.
- **Input**: `vector` (numpy array).
- **Output**: Index of the maximum value.

---

#### **`disrupt_network(self, model_index)`**
- **Purpose**: Simulates network disruption for a given model.
- **Input**:
  - `model_index`: Index of the column in the key file corresponding to the model.
- **Output**: Dictionary with disruption metrics:
  - `steps`
  - `components`
  - `size_largest_component`
  - `global_efficiency`
  - Normalized metrics.

---

#### **`run_all_models(self)`**
- **Purpose**: Evaluates all models and determines the best one for the current strategy.
- **Output**:
  - Results dictionary for all models.
  - Best model name.
  - Best model's steps.

---

#### **`plot_results(self, results, best_model, best_steps)`**
- **Purpose**: Visualizes the results for all models and highlights the best model.
- **Input**:
  - `results`: Dictionary of results for all models.
  - `best_model`: Name of the best model.
  - `best_steps`: Steps to disrupt the network for the best model.

---

#### **`plot_best_strategy(self, results, best_model, best_strategy)`**
- **Purpose**: Visualizes the best strategy's results.
- **Input**:
  - `results`: Dictionary of results for the best model.
  - `best_model`: Name of the best model.
  - `best_strategy`: Strategy name.

---

### **Function: `verificar_melhor_modelo(edge_file, key_files)`**
- **Purpose**: Determines the best disruption model across all strategies.
- **Input**:
  - `edge_file`: Path to the edge data file.
  - `key_files`: List of tuples with key file paths and strategies.
- **Output**:
  - Best global model, strategy, and steps.
  - Saves the best model to a text file.
  - Plots results.

---

### **Function: `main()`**
- **Purpose**: Executes the program. Reads input files and runs the analysis for all strategies.

---

## Usage Instructions
1. Prepare the input files:
   - `data/network.txt`: Contains the edge file name.
   - `data/key_human.csv`, `data/key_social.csv`, `data/key_mixed.csv`: Keys for each strategy.
2. Outputs:
	•	Results for each model printed in the console.
	•	Plots saved in the figure/ directory.
	•	Best model details saved in data/parameters.txt.