
1. **Importación de Archivos:**
   - Importar los archivos limpios de plata y acero que se encuentran en la carpeta `data_migration/data/clean_files` en este notebook de Jupyter.
   - Verificar que los tipos de columna sean los apropiados y realizar las correcciones necesarias.



In [1]:
import pandas as pd

# Aumentar el número de columnas que se muestran
pd.set_option('display.max_columns', None)
pd.set_option('max_info_columns', 200)

# Aumentar el número de filas que se muestran (opcional)
pd.set_option('display.max_rows', None)


from typing import List

path_plata = "data/ventas_plata.xlsx"


ventas_plata = pd.read_excel(path_plata)




In [2]:

# FUNCTIONS
def get_sheet_names(file_path: str) -> List[str]:
    """
    Get the names of all sheets in an Excel file.

    Args:
    file_path (str): The path to the Excel file from which the sheet names are to be retrieved.

    Returns:
    List[str]: A list of strings, where each string is the name of a sheet in the Excel file.

    Functionality:
    - Reads an Excel file using pandas.
    - Retrieves the names of all sheets in the Excel file.
    """
    # Read the excel file using pandas
    xls = pd.ExcelFile(file_path)
    
    # Get the names of all the sheets in the excel file
    sheet_names = xls.sheet_names
    
    # Return the list of sheet names
    return sheet_names




#. aqui estoy guardando en un objeto llamado sheet_names los nombres de cada una de las spreadsheets


In [3]:

sheet_names = get_sheet_names(path_plata)
print("Sheet names:")
print(sheet_names)

Sheet names:
['anillos_plata', 'varios_plata', 'aretes_plata', 'sets_plata', 'pulseras_plata', 'dijes_plata', 'cadenas_plata', 'collares_plata', 'collares_varios_plata', 'juegos_piedras_plata', 'pc_piedras_plata']


In [9]:
# 3. Use a for loop to read spreadsheets in sheet_names and append them in a list
# Replace the word "inventario" in column names with "ingreso" to simplify consolidation
list_of_df = []
for s in sheet_names:
    print(f"Sheet Name: {s}")
    # Read sheet and add new column with sheet name
    df = pd.read_excel(path_plata, sheet_name=s)
    df["source"] = s
    
    # Append and print info
    list_of_df.append(df)
    print(f"Shape: {df.shape}")
    #print(f"Column Names: {df.columns}\n") 

    for n in df.columns :
        print(n)

Sheet Name: anillos_plata
Shape: (362, 188)
COD
NUM
DET
2023-03-20 00:00:00
2023-03-23 00:00:00
2023-03-28 00:00:00
2023-03-31 00:00:00
2023-04-01 00:00:00
2023-04-04 00:00:00
2023-04-12 00:00:00
2023-04-13 00:00:00
2023-04-15 00:00:00
2023-04-18 00:00:00
2023-04-21 00:00:00
2023-04-25 00:00:00
2023-04-29 00:00:00
2023-05-02 00:00:00
2023-05-05 00:00:00
2023-05-07 00:00:00
2023-05-09 00:00:00
2023-05-10 00:00:00
2024-05-19 00:00:00
2023-05-25 00:00:00
2023-05-26 00:00:00
2023-06-01 00:00:00
2023-06-02 00:00:00
2023-06-05 00:00:00
2023-06-07 00:00:00
2023-06-09 00:00:00
2023-06-19 00:00:00
2023-06-20 00:00:00
2023-06-21 00:00:00
2024-06-24 00:00:00
2023-06-26 00:00:00
2023-07-04 00:00:00
2023-07-05 00:00:00
2023-07-07 00:00:00
2023-07-10 00:00:00
2023-07-11 00:00:00
2023-07-19 00:00:00
2023-07-26 00:00:00
2023-07-27 00:00:00
2023-08-02 00:00:00
2023-08-07 00:00:00
2023-08-11 00:00:00
2023-08-29 00:00:00
2023-08-30 00:00:00
2024-09-15 00:00:00
2024-09-18 00:00:00
2024-09-21 00:00:00
2024

In [5]:
#creamos dataframe all y convertimos los nombres de las columnas a str filtrando los 10 primeros caracteres
all = pd.concat(list_of_df)
print(f"Sheet Name: all")
print(f"Shape: {all.shape}")
all.columns= [str(c)[:10] for c in all.columns]

Sheet Name: all
Shape: (2662, 469)


1. renombrar columnas para que no tengan la hora 
2. limpieza de filas cuya suma de ventas es igual a 0 antes de poner la informacion en list_of_df

In [8]:
all.dtypes

COD            object
NUM            object
DET            object
2023-03-20    float64
2023-03-23    float64
2023-03-28    float64
2023-03-31    float64
2023-04-01    float64
2023-04-04    float64
2023-04-12    float64
2023-04-13    float64
2023-04-15    float64
2023-04-18    float64
2023-04-21    float64
2023-04-25    float64
2023-04-29    float64
2023-05-02    float64
2023-05-05    float64
2023-05-07    float64
2023-05-09    float64
2023-05-10    float64
2024-05-19    float64
2023-05-25    float64
2023-05-26    float64
2023-06-01    float64
2023-06-02    float64
2023-06-05    float64
2023-06-07    float64
2023-06-09    float64
2023-06-19    float64
2023-06-20    float64
2023-06-21    float64
2024-06-24    float64
2023-06-26    float64
2023-07-04    float64
2023-07-05    float64
2023-07-07    float64
2023-07-10    float64
2023-07-11    float64
2023-07-19    float64
2023-07-26    float64
2023-07-27    float64
2023-08-02    float64
2023-08-07    float64
2023-08-11    float64
2023-08-29