## Library Installation

Ensure that the necessary libraries are installed before running the notebook.

In [1]:
# Import necessary libraries
%pip install openpyxl
from openpyxl import load_workbook
import pandas as pd
import os
import re
import itertools

print("Skeleton setup complete!")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
Skeleton setup complete!


## Variable Declaration

Set the variables for file paths, sheet names, and other configurations. Update these variables for each specific project.

In [2]:
# Path to the Excel file (change this for each project)
csv_file_path = '/workspaces/Finetwork-Automation/inbound/Informe de métricas históricas.csv'
excel_file_path = '/workspaces/Finetwork-Automation/inbound/Metabase.xlsx'

agents_df = pd.read_excel(excel_file_path, sheet_name='AGENTES', usecols='A')
agents_list = agents_df.iloc[:, 0].tolist()

print("Variables defined correctly!")

Variables defined correctly!


## Extract Data from "Informe de métricas históricas" CSV file
Extract data from the CSV file and convert it directly to a DataFrame.

In [3]:
import pandas as pd

llamadas_df = pd.read_csv(csv_file_path)

# Mostrar los primeros datos para ver la estructura
print("Primeras filas del DataFrame:")
print(llamadas_df.head())


Primeras filas del DataFrame:
                                Agent                  StartInterval  \
0  Irati.izaguirre@originaltelecom.es  2024-08-23T00:00:00.000+02:00   
1    Mauricio.pozo@originaltelecom.es  2024-08-23T00:00:00.000+02:00   
2      Pilar.deval@originaltelecom.es  2024-08-23T00:00:00.000+02:00   
3       albaaraujo@originaltelecom.es  2024-08-23T00:00:00.000+02:00   
4     albertocanto@originaltelecom.es  2024-08-23T00:00:00.000+02:00   

                     EndInterval  Contacts handled incoming  \
0  2024-08-24T00:00:00.000+02:00                         23   
1  2024-08-24T00:00:00.000+02:00                         30   
2  2024-08-24T00:00:00.000+02:00                         35   
3  2024-08-24T00:00:00.000+02:00                         31   
4  2024-08-24T00:00:00.000+02:00                         32   

   Contacts transferred out  
0                        10  
1                        10  
2                        17  
3                         7  
4       

## Clean Data from "Informe de métricas históricas" CSV file
Delete the two interval columns

In [4]:
# Convertir subíndices en columnas regulares (si es necesario)
df_reset = llamadas_df.reset_index()

# Eliminar las columnas de intervalo si no son necesarias
columns_to_remove = ['StartInterval', 'EndInterval']  # Nombres de las columnas a eliminar

df_cleaned = df_reset.drop(columns=columns_to_remove)
llamadas_df = df_cleaned

# Mostrar el DataFrame limpio
print("DataFrame limpio:")
print(llamadas_df.head())

# Verificar las columnas del DataFrame limpio
print("Columnas del DataFrame limpio:", llamadas_df.columns.tolist())

DataFrame limpio:
   index                               Agent  Contacts handled incoming  \
0      0  Irati.izaguirre@originaltelecom.es                         23   
1      1    Mauricio.pozo@originaltelecom.es                         30   
2      2      Pilar.deval@originaltelecom.es                         35   
3      3       albaaraujo@originaltelecom.es                         31   
4      4     albertocanto@originaltelecom.es                         32   

   Contacts transferred out  
0                        10  
1                        10  
2                        17  
3                         7  
4                        14  
Columnas del DataFrame limpio: ['index', 'Agent', 'Contacts handled incoming', 'Contacts transferred out']


## Verify and Complete Data

Verify that all agents are present in each DataFrame. If an agent is missing, add a row with zeros for that agent.

In [6]:
def ensure_all_agents(df, agents_list):
    """
    Ensure all agents are present in the DataFrame. Add missing agents with zero values and remove agents not in the list.
    
    Parameters:
    df (pd.DataFrame): The DataFrame to check and update.
    agents_list (list): The list of all agents.
    
    Returns:
    pd.DataFrame: The updated DataFrame with all agents.
    """
    
    email_column = df.columns[1]
    # Get the list of agents in the DataFrame
    existing_agents = df[email_column].tolist()
    
    # Find missing agents
    missing_agents = [agent for agent in agents_list if agent not in existing_agents]
    
    # Add rows for missing agents with zero values
    if missing_agents:
        zero_rows = pd.DataFrame({
            email_column: missing_agents,
            'Contacts handled incoming': [0] * len(missing_agents),
            'Contacts transferred out': [0] * len(missing_agents)
        })
        df = pd.concat([df, zero_rows], ignore_index=True)
    
    # Remove agents not in the agents list
    df = df[df[email_column].isin(agents_list)]
    
    print(f"Added {len(missing_agents)} missing agents and removed {df.shape[0] - len(agents_list)} agents not in the list.")
    return df

# Apply the function to each DataFrame
llamadas_df = ensure_all_agents(llamadas_df, agents_list)

# Display the updated DataFrames
print("Active DataFrame after ensuring all agents:")
display(llamadas_df.head(99))

Added 43 missing agents and removed 0 agents not in the list.
Active DataFrame after ensuring all agents:


Unnamed: 0,index,Agent,Contacts handled incoming,Contacts transferred out
43,,albaaraujo@originaltelecom.es,0,0
44,,albertocanto@originaltelecom.es,0,0
45,,albertosanchez@originaltelecom.es,0,0
46,,anasanchez@originaltelecom.es,0,0
47,,antonio.reina@originaltelecom.es,0,0
48,,azahara.garcia@originaltelecom.es,0,0
49,,beatriz.gomez@originaltelecom.es,0,0
50,,maricarmen.cornejo@originaltelecom.es,0,0
51,,carolinafuentes@originaltelecom.es,0,0
52,,cesar.arnaldo@originaltelecom.es,0,0


## Assign Values to Emails

Assign numerical values to each email and add them as a new column in the DataFrames.

In [None]:
# Dictionary mapping emails to their respective values
email_values = {
    'albaaraujo@originaltelecom.es': 1,
    'albertocanto@originaltelecom.es': 2,
    'albertosanchez@originaltelecom.es': 3,
    'anasanchez@originaltelecom.es': 4,
    'antonio.reina@originaltelecom.es': 5,
    'azahara.garcia@originaltelecom.es': 6,
    'beatriz.gomez@originaltelecom.es': 7,
    'maricarmen.cornejo@originaltelecom.es': 8,
    'carolinafuentes@originaltelecom.es': 9,
    'cesar.arnaldo@originaltelecom.es': 10,
    'david.molero@originaltelecom.es': 11,
    'elenaborrero@originaltelecom.es': 12,
    'estefania.panea@originaltelecom.es': 13,
    'francisco.perdomo@originaltelecom.es': 14,
    'gonzalofalcon@originaltelecom.es': 15,
    'guillermo.hurtado@originaltelecom.es': 16,
    'irati.izaguirre@originaltelecom.es': 17,
    'ivan.barroso@originaltelecom.es': 18,
    'laura.eguens@originaltelecom.es': 19,
    'lailasetati@originaltelecom.es': 20,
    'leonor.lopez@originaltelecom.es': 21,
    'dolores.cortes@originaltelecom.es': 22,
    'manuelvaldes@originaltelecom.es': 23,
    'manuelventura@originaltelecom.es': 24,
    'mar.aguila@originaltelecom.es': 25,
    'mariangeles.bueso@originaltelecom.es': 26,
    'mariaarroyo@originaltelecom.es': 27,
    'maria.torres@originaltelecom.es': 28,
    'marta.dorado@originaltelecom.es': 29,
    'mauricio.pozo@originaltelecom.es': 30,
    'miguel.segura@originaltelecom.es': 31,
    'miriam.rodriguez@originaltelecom.es': 32,
    'mar.marchena@originaltelecom.es': 33,
    'natividad.sanchez@originaltelecom.es': 34,
    'nereacerezo@originaltelecom.es': 35,
    'oscar.rivilla@originaltelecom.es': 36,
    'patricia.rios@originaltelecom.es': 37,
    'paulavilla@originaltelecom.es': 38,
    'pilar.deval@originaltelecom.es': 39,
    'sara.elkhelyfy@originaltelecom.es': 40,
    'sergio.vazquez@originaltelecom.es': 41,
    'yicel.patricia@originaltelecom.es': 42,
    'yzabelly.gomes@originaltelecom.es': 43
}

# Add a new column to each DataFrame with the email values
def add_email_values(df, email_values):
    df['email_value'] = df.iloc[:, 0].map(email_values)
    return df

# Apply the function to each DataFrame
llamadas_df = add_email_values(llamadas_df, email_values)

# Display the updated DataFrames with the new 'email_value' column
print("Active DataFrame with email values:")
display(llamadas_df.head())