## Library Installation

Ensure that the necessary libraries are installed before running the notebook.

In [1]:
# Import necessary libraries
%pip install openpyxl
from openpyxl import load_workbook
import pandas as pd
import os
import re
import itertools

print("Skeleton setup complete!")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
Skeleton setup complete!


## Variable Declaration

Set the variables for file paths, sheet names, and other configurations. Update these variables for each specific project.

In [2]:
# Path to the Excel file (change this for each project)
excel_file_path = '/workspaces/Finetwork-Automation/retention/CARGA SERVICIOS.xlsx'
csv_file_path = '/workspaces/Finetwork-Automation/inbound/Informe de métricas históricas.csv'

# Sheet names for different categories
sheet_active = 'ACTIVOS'
sheet_canceled = 'CANCELADOS'
sheet_pending = 'PTE DE FIRMA'
sheet_value = 'TOP VALOR'
sheet_packs = 'TOP PACKS'
sheet_services = 'TOP SERVICIOS'

# Range to read (change this for each project)
start_row = 5
end_row = 30
usecols = 'A:B'

print("Variables defined correctly!")

Variables defined correctly!


## Extract Data from all six Sheets

Extract data from all three Sheets within the specified range and convert it directly to a DataFrame.

In [4]:
def load_sheet_as_dataframe(file_path, sheet_name, start_row, end_row, usecols):
    # Load data from the specified sheet and range into a DataFrame
    df = pd.read_excel(file_path, sheet_name=sheet_name, usecols=usecols, skiprows=start_row-1, nrows=end_row-start_row+1)
    print(f"Data from '{sheet_name}' sheet loaded successfully.")
    return df

# Extract data from 'Active' sheet
ct_bajas_df = load_sheet_as_dataframe(excel_file_path, 'CT BAJAS', start_row, end_row, usecols)
ct_porta_df = load_sheet_as_dataframe(excel_file_path, 'CT PORTA', start_row, end_row, usecols)
bd_bajas_df = load_sheet_as_dataframe(excel_file_path, 'BD BAJAS', start_row, end_row, usecols)

# Display the DataFrames
print("CT BAJAS DataFrame:")
display(ct_bajas_df.head())

print("CT PORTA DataFrame:")
display(ct_porta_df.head())

print("BD BAJAS DataFrame:")
display(bd_bajas_df.head())

Data from 'CT BAJAS' sheet loaded successfully.
Data from 'CT PORTA' sheet loaded successfully.
Data from 'BD BAJAS' sheet loaded successfully.
CT BAJAS DataFrame:


Unnamed: 0,Etiquetas de fila,Cuenta de Agente Respuesta N1
0,Gonzalo Rasero,3
1,Lorena Villalba,6
2,Manuel Cabra,2
3,Rosa Vilches,2
4,Total general,13


CT PORTA DataFrame:


Unnamed: 0,Etiquetas de fila,Cuenta de Agente Respuesta N1
0,Carmen Romero,2
1,Irene Mateos,1
2,Pedro Manzanero,2
3,Total general,5


BD BAJAS DataFrame:


Unnamed: 0,Etiquetas de fila,Cuenta de AGENTE
0,Carmen Romero,2
1,Gonzalo Rasero,1
2,Rosa Vilches,1
3,Total general,4


## Load Agents List

Load the list of all agents from the "Agents" sheet.

In [5]:
# Load the list of agents
agents_df = pd.read_excel(excel_file_path, sheet_name='AGENTES', usecols='A')
agents_list = agents_df.iloc[:, 0].tolist()
print("Agents list loaded successfully!")
print(agents_list)

Agents list loaded successfully!
['Carmen Romero', 'Gonzalo Rasero', 'Irene Mateos', 'Lorena Villalba', 'Manuel Cabra', 'Maria Jesus Bruno', 'Maria Jose Moreno', 'Patricia Sancho', 'Pedro Manzanero', 'Rocio Montero', 'Rosa Vilches', 'Tamara Conde', 'Veronica Arenas', 'Virginia Aragon']


## Verify and Complete Data

Verify that all agents are present in each DataFrame. If an agent is missing, add a row with zeros for that agent.

In [9]:
def ensure_all_agents(df, agents_list):
    """
    Ensure all agents are present in the DataFrame. Add missing agents with zero values and remove agents not in the list.
    
    Parameters:
    df (pd.DataFrame): The DataFrame to check and update.
    agents_list (list): The list of all agents.
    
    Returns:
    pd.DataFrame: The updated DataFrame with all agents.
    """
    # Get the list of agents in the DataFrame
    existing_agents = df.iloc[:, 0].tolist()
    
    # Find missing agents
    missing_agents = [agent for agent in agents_list if agent not in existing_agents]
    
    # Add rows for missing agents with zero values
    for agent in missing_agents:
        zero_row = pd.DataFrame([[agent] + [0] * (df.shape[1] - 1)], columns=df.columns)
        df = pd.concat([df, zero_row], ignore_index=True)
    
    # Remove agents not in the agents list
    df = df[df.iloc[:, 0].isin(agents_list)]
    
    print(f"Added {len(missing_agents)} missing agents and removed {len(existing_agents) - len(df)} agents not in the list.")
    return df

# Apply the function to each DataFrame
ct_bajas_df = ensure_all_agents(ct_bajas_df, agents_list)
ct_porta_df = ensure_all_agents(ct_porta_df, agents_list)
bd_bajas_df = ensure_all_agents(bd_bajas_df, agents_list)

# Display the updated DataFrames
print("CT BAJAS DataFrame after ensuring all agents:")
display(ct_bajas_df.head(15))

print("CT PORTA DataFrame after ensuring all agents:")
display(ct_porta_df.head(15))

print("BD BAJAS DataFrame after ensuring all agents:")
display(bd_bajas_df.head(15))

Added 0 missing agents and removed 0 agents not in the list.
Added 0 missing agents and removed 0 agents not in the list.
Added 0 missing agents and removed 0 agents not in the list.
CT BAJAS DataFrame after ensuring all agents:


Unnamed: 0,Etiquetas de fila,Cuenta de Agente Respuesta N1
0,Gonzalo Rasero,3
1,Lorena Villalba,6
2,Manuel Cabra,2
3,Rosa Vilches,2
5,Carmen Romero,0
6,Irene Mateos,0
7,Maria Jesus Bruno,0
8,Maria Jose Moreno,0
9,Patricia Sancho,0
10,Pedro Manzanero,0


CT PORTA DataFrame after ensuring all agents:


Unnamed: 0,Etiquetas de fila,Cuenta de Agente Respuesta N1
0,Carmen Romero,2
1,Irene Mateos,1
2,Pedro Manzanero,2
4,Gonzalo Rasero,0
5,Lorena Villalba,0
6,Manuel Cabra,0
7,Maria Jesus Bruno,0
8,Maria Jose Moreno,0
9,Patricia Sancho,0
10,Rocio Montero,0


BD BAJAS DataFrame after ensuring all agents:


Unnamed: 0,Etiquetas de fila,Cuenta de AGENTE
0,Carmen Romero,2
1,Gonzalo Rasero,1
2,Rosa Vilches,1
4,Irene Mateos,0
5,Lorena Villalba,0
6,Manuel Cabra,0
7,Maria Jesus Bruno,0
8,Maria Jose Moreno,0
9,Patricia Sancho,0
10,Pedro Manzanero,0


## Assign Values to Emails

Assign numerical values to each email and add them as a new column in the DataFrames.

In [None]:
# Dictionary mapping emails to their respective values
email_values = {
    'albaaraujo@originaltelecom.es': 1,
    'albertocanto@originaltelecom.es': 2,
    'albertosanchez@originaltelecom.es': 3,
    'anasanchez@originaltelecom.es': 4,
    'antonio.reina@originaltelecom.es': 5,
    'azahara.garcia@originaltelecom.es': 6,
    'beatriz.gomez@originaltelecom.es': 7,
    'maricarmen.cornejo@originaltelecom.es': 8,
    'carolinafuentes@originaltelecom.es': 9,
    'cesar.arnaldo@originaltelecom.es': 10,
    'david.molero@originaltelecom.es': 11,
    'elenaborrero@originaltelecom.es': 12,
    'estefania.panea@originaltelecom.es': 13,
    'francisco.perdomo@originaltelecom.es': 14,
    'gonzalofalcon@originaltelecom.es': 15,
    'guillermo.hurtado@originaltelecom.es': 16,
    'irati.izaguirre@originaltelecom.es': 17,
    'ivan.barroso@originaltelecom.es': 18,
    'laura.eguens@originaltelecom.es': 19,
    'lailasetati@originaltelecom.es': 20,
    'leonor.lopez@originaltelecom.es': 21,
    'dolores.cortes@originaltelecom.es': 22,
    'manuelvaldes@originaltelecom.es': 23,
    'manuelventura@originaltelecom.es': 24,
    'mar.aguila@originaltelecom.es': 25,
    'mariangeles.bueso@originaltelecom.es': 26,
    'mariaarroyo@originaltelecom.es': 27,
    'maria.torres@originaltelecom.es': 28,
    'marta.dorado@originaltelecom.es': 29,
    'mauricio.pozo@originaltelecom.es': 30,
    'miguel.segura@originaltelecom.es': 31,
    'miriam.rodriguez@originaltelecom.es': 32,
    'mar.marchena@originaltelecom.es': 33,
    'natividad.sanchez@originaltelecom.es': 34,
    'nereacerezo@originaltelecom.es': 35,
    'oscar.rivilla@originaltelecom.es': 36,
    'patricia.rios@originaltelecom.es': 37,
    'paulavilla@originaltelecom.es': 38,
    'pilar.deval@originaltelecom.es': 39,
    'sara.elkhelyfy@originaltelecom.es': 40,
    'sergio.vazquez@originaltelecom.es': 41,
    'yicel.patricia@originaltelecom.es': 42,
    'yzabelly.gomes@originaltelecom.es': 43
}

# Add a new column to each DataFrame with the email values
def add_email_values(df, email_values):
    df['email_value'] = df.iloc[:, 0].map(email_values)
    return df

# Apply the function to each DataFrame
active_df = add_email_values(active_df, email_values)
canceled_df = add_email_values(canceled_df, email_values)
pending_signature_df = add_email_values(pending_signature_df, email_values)
value_df = add_email_values(value_df, email_values)
packs_df = add_email_values(packs_df, email_values)
services_df = add_email_values(services_df, email_values)

# Display the updated DataFrames with the new 'email_value' column
print("Active DataFrame with email values:")
display(active_df.head())

print("Canceled DataFrame with email values:")
display(canceled_df.head())

print("Pending Signature DataFrame with email values:")
display(pending_signature_df.head())

print("Value DataFrame with email values:")
display(value_df.head())

print("Packs DataFrame with email values:")
display(packs_df.head())

print("Services DataFrame with email values:")
display(services_df.head())