## Library Installation

Ensure that the necessary libraries are installed before running the notebook.

In [1]:
# Import necessary libraries
%pip install openpyxl
from openpyxl import load_workbook
import pandas as pd
import os
import re
import itertools

print("Skeleton setup complete!")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
Skeleton setup complete!


## Variable Declaration

Set the variables for file paths, sheet names, and other configurations. Update these variables for each specific project.

In [2]:
# Path to the Excel file (change this for each project)
excel_file_path = '/workspaces/Finetwork-Automation/inbound/Metabase.xlsx'
csv_file_path = '/workspaces/Finetwork-Automation/inbound/Informe de métricas históricas.csv'

# Sheet names for different categories
sheet_active = 'ACTIVOS'
sheet_canceled = 'CANCELADOS'
sheet_pending = 'PTE DE FIRMA'
sheet_value = 'TOP VALOR'
sheet_packs = 'TOP PACKS'
sheet_services = 'TOP SERVICIOS'

# Range to read (change this for each project)
start_row = 8
end_row = 150
usecols = 'A:F'

print("Variables defined correctly!")

Variables defined correctly!


## Extract Data from pivot canceled sheet

Extract data from pivot canceled sheet and convert it directly to a DataFrame.

In [3]:
import pandas as pd

def load_sheet_as_dataframe(file_path, sheet_name, start_row, usecols):
    """
    Load data from the specified sheet and range into a DataFrame.

    Parameters:
    file_path (str): Path to the Excel file.
    sheet_name (str): Name of the sheet to load data from.
    start_row (int): The row number to start loading data from.
    usecols (str): String indicating the columns to load (e.g., 'A,R').

    Returns:
    pd.DataFrame: The loaded DataFrame.
    """
    # Load data from the specified sheet and range into a DataFrame
    df = pd.read_excel(file_path, sheet_name=sheet_name, usecols=usecols, skiprows=start_row-1)
    print(f"Data from '{sheet_name}' sheet loaded successfully.")
    return df

# Parameters for loading the sheet
excel_file_path = '/workspaces/Finetwork-Automation/inbound/Metabase.xlsx'
sheet_name = 'CANCELADOS PIVOT'
start_row = 1  # Start from row 2
usecols = 'A,R'  # Columns A and R

# Extract data from 'CANCELADOS PIVOT' sheet
canceled_pivot_df = load_sheet_as_dataframe(excel_file_path, sheet_name, start_row, usecols)

# Display the DataFrame
print("Canceled Pivot DataFrame:")
display(canceled_pivot_df.head(88))

Data from 'CANCELADOS PIVOT' sheet loaded successfully.
Canceled Pivot DataFrame:


Unnamed: 0,email_employed,service__status_name
0,david.molero@originaltelecom.es,
1,laura.eguens@originaltelecom.es,
2,laura.eguens@originaltelecom.es,
3,laura.eguens@originaltelecom.es,
4,laura.eguens@originaltelecom.es,
...,...,...
83,formacion7@originaltelecom.es,CANCELADO
84,formacion7@originaltelecom.es,CANCELADO
85,formacion7@originaltelecom.es,CANCELADO
86,david.molero@originaltelecom.es,


## Group Canceled values

Blablabla

In [4]:
import pandas as pd

# Agrupar por 'email_employed' y contar el número de veces que aparece 'CANCELADO' en la columna 'service__status_name'
result_df = canceled_pivot_df.groupby('email_employed')['service__status_name'].apply(lambda x: x.eq('CANCELADO').sum()).reset_index()

# Renombrar la columna para que sea más descriptiva
canceled_pivot_df = result_df.rename(columns={'service__status_name': 'Num_Cancelados'})

# Mostrar el DataFrame resultante
print(canceled_pivot_df)

                             email_employed  Num_Cancelados
0               alberto.valle@finetwork.com               0
1           albertocanto@originaltelecom.es               1
2              anamaria.llorca@finetwork.es               1
3         azahara.garcia@originaltelecom.es               0
4          beatriz.gilperez@grupokonecta.es               1
5        carolinafuentes@originaltelecom.es               1
6           david.molero@originaltelecom.es               4
7        diego.temblador@originaltelecom.es               2
8         dolores.cortes@originaltelecom.es               4
9           elenaborrero@originaltelecom.es               3
10       estefania.panea@originaltelecom.es               7
11           formacion10@originaltelecom.es               1
12            formacion1@originaltelecom.es               5
13            formacion3@originaltelecom.es               1
14            formacion6@originaltelecom.es               0
15            formacion7@originaltelecom

## Load Agents List

Load the list of all agents from the "Agents" sheet.

In [5]:
# Load the list of agents
agents_df = pd.read_excel(excel_file_path, sheet_name='AGENTES', usecols='A')
agents_list = agents_df.iloc[:, 0].tolist()
print("Agents list loaded successfully!")
print(agents_list)

Agents list loaded successfully!
['albaaraujo@originaltelecom.es', 'albertocanto@originaltelecom.es', 'albertosanchez@originaltelecom.es', 'antonio.reina@originaltelecom.es', 'azahara.garcia@originaltelecom.es', 'beatriz.gomez@originaltelecom.es', 'maricarmen.cornejo@originaltelecom.es', 'carolinafuentes@originaltelecom.es', 'cesar.arnaldo@originaltelecom.es', 'david.molero@originaltelecom.es', 'elenaborrero@originaltelecom.es', 'estefania.panea@originaltelecom.es', 'francisco.perdomo@originaltelecom.es', 'gonzalofalcon@originaltelecom.es', 'guillermo.hurtado@originaltelecom.es', 'irati.izaguirre@originaltelecom.es', 'ivan.barroso@originaltelecom.es', 'laura.eguens@originaltelecom.es', 'lailasetati@originaltelecom.es', 'leonor.lopez@originaltelecom.es', 'dolores.cortes@originaltelecom.es', 'manuelcabra@originaltelecom.es', 'manuelvaldes@originaltelecom.es', 'manuelventura@originaltelecom.es', 'mariangeles.bueso@originaltelecom.es', 'mariaarroyo@originaltelecom.es', 'maria.torres@origin

## Verify and Complete Data

Verify that all agents are present in each DataFrame. If an agent is missing, add a row with zeros for that agent.

In [6]:
def ensure_all_agents(df, agents_list):
    """
    Ensure all agents are present in the DataFrame. Add missing agents with zero values and remove agents not in the list.
    
    Parameters:
    df (pd.DataFrame): The DataFrame to check and update.
    agents_list (list): The list of all agents.
    
    Returns:
    pd.DataFrame: The updated DataFrame with all agents.
    """
    # Get the list of agents in the DataFrame
    existing_agents = df.iloc[:, 0].tolist()
    
    # Find missing agents
    missing_agents = [agent for agent in agents_list if agent not in existing_agents]
    
    # Add rows for missing agents with zero values
    for agent in missing_agents:
        zero_row = pd.DataFrame([[agent] + [0] * (df.shape[1] - 1)], columns=df.columns)
        df = pd.concat([df, zero_row], ignore_index=True)
    
    # Remove agents not in the agents list
    df = df[df.iloc[:, 0].isin(agents_list)]
    
    print(f"Added {len(missing_agents)} missing agents and removed {len(existing_agents) - len(df)} agents not in the list.")
    return df

# Apply the function to each DataFrame
canceled_pivot_df = ensure_all_agents(canceled_pivot_df, agents_list)

# Display the updated DataFrames
print("Canceled Pivot DataFrame after ensuring all agents:")
display(canceled_pivot_df.head(66))

Added 14 missing agents and removed -1 agents not in the list.
Canceled Pivot DataFrame after ensuring all agents:


Unnamed: 0,email_employed,Num_Cancelados
1,albertocanto@originaltelecom.es,1
3,azahara.garcia@originaltelecom.es,0
5,carolinafuentes@originaltelecom.es,1
6,david.molero@originaltelecom.es,4
8,dolores.cortes@originaltelecom.es,4
9,elenaborrero@originaltelecom.es,3
10,estefania.panea@originaltelecom.es,7
16,francisco.perdomo@originaltelecom.es,2
18,gonzalofalcon@originaltelecom.es,2
19,guillermo.hurtado@originaltelecom.es,1


## Assign Values to Emails

Assign numerical values to each email and add them as a new column in the DataFrames.

In [7]:
# Dictionary mapping emails to their respective values
email_values = {
    'albaaraujo@originaltelecom.es': 1,
    'albertocanto@originaltelecom.es': 2,
    'albertosanchez@originaltelecom.es': 3,
    'antonio.reina@originaltelecom.es': 4,
    'azahara.garcia@originaltelecom.es': 5,
    'beatriz.gomez@originaltelecom.es': 6,
    'maricarmen.cornejo@originaltelecom.es': 7,
    'carolinafuentes@originaltelecom.es': 8,
    'cesar.arnaldo@originaltelecom.es': 9,
    'david.molero@originaltelecom.es': 10,
    'elenaborrero@originaltelecom.es': 11,
    'estefania.panea@originaltelecom.es': 12,
    'francisco.perdomo@originaltelecom.es': 13,
    'gonzalofalcon@originaltelecom.es': 14,
    'guillermo.hurtado@originaltelecom.es': 15,
    'irati.izaguirre@originaltelecom.es': 16,
    'ivan.barroso@originaltelecom.es': 17,
    'laura.eguens@originaltelecom.es': 18,
    'lailasetati@originaltelecom.es': 19,
    'leonor.lopez@originaltelecom.es': 20,
    'dolores.cortes@originaltelecom.es': 21,
    'manuelcabra@originaltelecom.es': 22,
    'manuelvaldes@originaltelecom.es': 23,
    'manuelventura@originaltelecom.es': 24,
    'mariangeles.bueso@originaltelecom.es': 25,
    'mariaarroyo@originaltelecom.es': 26,
    'maria.torres@originaltelecom.es': 27,
    'mario.infante@originaltelecom.es': 28,
    'marta.dorado@originaltelecom.es': 29,
    'mauricio.pozo@originaltelecom.es': 30,
    'miguel.segura@originaltelecom.es': 31,
    'miriam.rodriguez@originaltelecom.es': 32,
    'mar.marchena@originaltelecom.es': 33,
    'natividad.sanchez@originaltelecom.es': 34,
    'oscar.rivilla@originaltelecom.es': 35,
    'patricia.rios@originaltelecom.es': 36,
    'paulavilla@originaltelecom.es': 37,
    'pedro.manzanero@originaltelecom.es': 38,
    'sara.elkhelyfy@originaltelecom.es': 39,
    'sergio.vazquez@originaltelecom.es': 40,
    'tania.arenas@originaltelecom.es': 41,
    'yicel.patricia@originaltelecom.es': 42,
    'yzabelly.gomes@originaltelecom.es': 43
}

# Add a new column to each DataFrame with the email values
def add_email_values(df, email_values):
    df['email_value'] = df.iloc[:, 0].map(email_values)
    return df

# Apply the function to each DataFrame
canceled_pivot_df = add_email_values(canceled_pivot_df, email_values)

# Display the updated DataFrames with the new 'email_value' column
print("Active DataFrame with email values:")
display(canceled_pivot_df.head())

Active DataFrame with email values:


Unnamed: 0,email_employed,Num_Cancelados,email_value
1,albertocanto@originaltelecom.es,1,2
3,azahara.garcia@originaltelecom.es,0,5
5,carolinafuentes@originaltelecom.es,1,8
6,david.molero@originaltelecom.es,4,10
8,dolores.cortes@originaltelecom.es,4,21


## Sort DataFrames by Email Values

Sort the DataFrames based on the numerical values assigned to the emails.

In [8]:
# Sort each DataFrame by the 'email_value' column
def sort_by_email_value(df):
    df = df.sort_values(by='email_value')
    return df

# Apply the sorting function to each DataFrame
canceled_pivot_df = sort_by_email_value(canceled_pivot_df)

# Display the sorted DataFrames
print("Sorted Active DataFrame:")
display(canceled_pivot_df.head(88)) 

Sorted Active DataFrame:


Unnamed: 0,email_employed,Num_Cancelados,email_value
42,albaaraujo@originaltelecom.es,0,1
1,albertocanto@originaltelecom.es,1,2
43,albertosanchez@originaltelecom.es,0,3
44,antonio.reina@originaltelecom.es,0,4
3,azahara.garcia@originaltelecom.es,0,5
45,beatriz.gomez@originaltelecom.es,0,6
31,maricarmen.cornejo@originaltelecom.es,0,7
5,carolinafuentes@originaltelecom.es,1,8
46,cesar.arnaldo@originaltelecom.es,0,9
6,david.molero@originaltelecom.es,4,10


## Remove 'email_value' Column

After sorting the DataFrames based on the email values, the 'email_value' column should be removed to prevent interference with further calculations.

In [9]:
# Function to remove the 'email_value' column
def remove_email_value_column(df):
    if 'email_value' in df.columns:
        df = df.drop(columns=['email_value'])
    return df

# Apply the function to each DataFrame
canceled_pivot_df = remove_email_value_column(canceled_pivot_df)

# Display the updated DataFrames without the 'email_value' column
print("Active DataFrame after removing 'email_value' column:")
display(canceled_pivot_df.head(88))

Active DataFrame after removing 'email_value' column:


Unnamed: 0,email_employed,Num_Cancelados
42,albaaraujo@originaltelecom.es,0
1,albertocanto@originaltelecom.es,1
43,albertosanchez@originaltelecom.es,0
44,antonio.reina@originaltelecom.es,0
3,azahara.garcia@originaltelecom.es,0
45,beatriz.gomez@originaltelecom.es,0
31,maricarmen.cornejo@originaltelecom.es,0
5,carolinafuentes@originaltelecom.es,1
46,cesar.arnaldo@originaltelecom.es,0
6,david.molero@originaltelecom.es,4


## Obtain Data from Canceled Pivot DataFrames

Obtain the values to update the Excel File

checks

In [10]:
# Verificar los nombres de las columnas
print("Nombres de las columnas:", canceled_pivot_df.columns.tolist())
print("Primeras filas del DataFrame:")
print(result_df.head(55))
print("Tipos de datos de las columnas:")
print(result_df.dtypes)
# Quitar espacios de los nombres de las columnas
result_df.columns = result_df.columns.str.strip()

# Verificar nombres de columnas después de limpiar espacios
print("Nombres de las columnas después de limpiar espacios:", result_df.columns.tolist())
# Verificar duplicados en los nombres de las columnas
print("¿Hay nombres de columnas duplicados?:", result_df.columns.duplicated().any())
print("Filas con valores NaN en 'Num_Cancelados':")
# Asegurarse de que estamos accediendo a la columna correcta
print("Valores en la columna 'Num_Cancelados':")
print(result_df['service__status_name'].tolist())

# Alternativamente, si sospechamos que el nombre tiene algún error
print("Valores en la columna que parece ser 'Num_Cancelados':")
for col in result_df.columns:
    print(f"Probando columna: {col}")
    print(result_df[col].tolist()[:5])








Nombres de las columnas: ['email_employed', 'Num_Cancelados']
Primeras filas del DataFrame:
                             email_employed  service__status_name
0               alberto.valle@finetwork.com                     0
1           albertocanto@originaltelecom.es                     1
2              anamaria.llorca@finetwork.es                     1
3         azahara.garcia@originaltelecom.es                     0
4          beatriz.gilperez@grupokonecta.es                     1
5        carolinafuentes@originaltelecom.es                     1
6           david.molero@originaltelecom.es                     4
7        diego.temblador@originaltelecom.es                     2
8         dolores.cortes@originaltelecom.es                     4
9           elenaborrero@originaltelecom.es                     3
10       estefania.panea@originaltelecom.es                     7
11           formacion10@originaltelecom.es                     1
12            formacion1@originaltelecom.es       

In [11]:
# Obtener los valores de la columna 'Num_Cancelados'
canceled_pivot_values = canceled_pivot_df['Num_Cancelados'].values

# Mostrar los valores obtenidos
print("Valores calculados para 'Canceled Pivot' DataFrame:")
print(canceled_pivot_values[:66])  # Mostrar solo los primeros 5 para verificar

Valores calculados para 'Canceled Pivot' DataFrame:
[0 1 0 0 0 0 0 1 0 4 3 7 2 2 1 3 1 1 1 0 4 0 1 0 0 0 0 0 1 1 0 0 0 0 2 0 0
 0 3 1 0 3 0]


### Update finalFile Excel Sheet with Canceled Pivot Data

Update the "finalFile" Excel sheet with the calculated values from the Canceled Pivot DataFrame in column K.

In [12]:
final_file_path ='/workspaces/Finetwork-Automation/inbound/OBJETIVO_CALL_INB_SEPTIEMBRE_24.xlsx'
sheet_name = 'GLOBAL SEPTIEMBRE'# Change this to the correct sheet name

def update_final_file_canceled(file_path, sheet_name, values):
    """
    Update the specified Excel sheet with the provided values starting from row 3 in column K.
    
    Parameters:
    file_path (str): Path to the Excel file.
    sheet_name (str): Name of the sheet to update.
    values (list or array-like): Values to insert into the sheet.
    """
    # Load the workbook and select the sheet
    workbook = load_workbook(filename=file_path)
    sheet = workbook[sheet_name]
    
    # Start updating from row 3 in column K
    start_row = 3
    column = 9  # Column K
    
    for idx, value in enumerate(values, start=start_row):
        sheet.cell(row=idx, column=column, value=value)
    
    # Save the workbook
    workbook.save(file_path)
    print(f"Updated {len(values)} rows in '{sheet_name}' sheet of '{file_path}' with Canceled data.")

# Usar el código para actualizar el archivo Excel
update_final_file_canceled(final_file_path, sheet_name, canceled_pivot_values)


Updated 43 rows in 'GLOBAL SEPTIEMBRE' sheet of '/workspaces/Finetwork-Automation/inbound/OBJETIVO_CALL_INB_SEPTIEMBRE_24.xlsx' with Canceled data.
