In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Class responsible for processing anomaly scores
class AnomalyScoreProcessor:
    # Initialization method that loads the data and sets the output files
    def __init__(self, input_file, output_key_file, output_value_file):
        # Reads the input CSV file and stores it in a dataframe (self.data)
        self.data = pd.read_csv(input_file, sep=';')
        # Stores the paths for the output key and value files
        self.output_key_file = output_key_file
        self.output_value_file = output_value_file

    # Private method to create a dataframe from a specific score column
    def _create_score_df(self, score_col, label):
        # Create a dataframe with 'id' and the score column ('score_col')
        df = pd.DataFrame({
            'id': self.data['id'],
            'pontuacao': self.data[score_col]
        }).sort_values(by='pontuacao', ascending=False)  # Sort the data by score in descending order
        
        # Create a key dataframe that contains a list of IDs, sorted by score, with the column name as 'label'
        df_key = pd.DataFrame({label: df['id'].tolist()})
        
        # Create a value dataframe that contains 'id' and their respective scores, labeled with 'label'
        df_valor = pd.DataFrame({
            'id': self.data['id'],
            label: self.data[score_col]
        })
        # Return both the key and value dataframes
        return df_key, df_valor

    # Private method to create dataframes with anomaly predictions and anomaly scores
    def _create_anomaly_df(self, pred_col, score_col, label):
        # Create a dataframe with 'id', prediction column ('pred_col'), and score column ('score_col')
        df = pd.DataFrame({
            'id': self.data['id'],
            'predicao': self.data[pred_col],
            'pontuacao': self.data[score_col]
        }).sort_values(['predicao', 'pontuacao'])  # Sort by prediction and score
        
        # Create a new column 'y_pred', where anomaly (-1) is mapped to 1, and normal (1) to 0
        df['y_pred'] = np.where(df['predicao'] == -1, 1, 0)
        
        # Assign a rank to each row (descending order) based on the index position
        df['scores'] = range(len(df), 0, -1)
        
        # Normalize the scores by dividing them by the maximum score
        df['score'] = df['scores'] / np.max(df['scores'])
        
        # Create a value dataframe containing 'id' and the normalized score, labeled as 'label'
        df_valor = pd.DataFrame({
            'id': self.data['id'],
            label: df['score']
        })
        
        # Create a key dataframe with the IDs sorted by score, labeled as 'label'
        df_key = pd.DataFrame({label: df['id'].tolist()})
        
        # Return both the key and value dataframes
        return df_key, df_valor

    # Method to process and combine different anomaly scores and predictions
    def process_scores(self):
        # Create score dataframes for OS1 and OS2 columns
        df_key, df_valor = self._create_score_df('OS1', 'OS1')
        df_key2, df_valor2 = self._create_score_df('OS2', 'OS2')
        
        # Join OS1 and OS2 dataframes (keys and values)
        df_key = df_key.join(df_key2, how='outer')
        df_valor = df_valor.join(df_valor2.set_index('id'), on='id', rsuffix='_OS2')

        # Dictionary of anomaly models with their respective prediction and score column names
        anomaly_models = {
            'IsF': ('anomaly-IsF', 'scores-IsF'),
            'LOF': ('anomaly-Lof', 'scores-Lof'),
            'COV': ('anomaly-Cov', 'scores-Cov'),
            'SVM': ('anomaly-SVM', 'scores-SVM')
        }
        
        # Iterate through each anomaly model, process it, and join the results to the main dataframe
        for label, (pred_col, score_col) in anomaly_models.items():
            df_key_anomaly, df_valor_anomaly = self._create_anomaly_df(pred_col, score_col, label)
            df_key = df_key.join(df_key_anomaly, how='outer')
            df_valor = df_valor.join(df_valor_anomaly.set_index('id'), on='id', rsuffix=f'_{label}')

        # Save the resulting key and value dataframes to CSV files
        df_key.to_csv(self.output_key_file, sep=';', index=False)
        df_valor.to_csv(self.output_value_file, sep=';', index=False)

# Main method to run the program
def main():
    # Create processor for human capital data, process it, and save results
    processor = AnomalyScoreProcessor('data/humanNet.csv', 'data/key_human.csv', 'data/value_human.csv')
    processor.process_scores()

    # Create processor for social capital data, process it, and save results
    processor = AnomalyScoreProcessor('data/socialNet.csv', 'data/key_social.csv', 'data/value_social.csv')
    processor.process_scores()

    # Create processor for mixed capital data, process it, and save results
    processor = AnomalyScoreProcessor('data/mixedNet.csv', 'data/key_mixed.csv', 'data/value_mixed.csv')
    processor.process_scores()
    
# Check if the script is being run directly
if __name__ == "__main__":
    # If so, run the main function
    main()