# Nombre: Vicente Soto
# Curso: Data Analytics - Generación 106 (Desafío Latam)
## Desafío - Desafio  Manipulación y transformación de datos (Parte II) 

## PARTE 1 

In [35]:
# 1) Cargando archivos de excel a dataframes.

import pandas as pd

archivo_excel = "US_Regional_Sales_Data.xlsx"

excel_file = pd.ExcelFile(archivo_excel)
print("Hojas disponibles:", excel_file.sheet_names)

# Cargando cada hoja en su propio DataFrame usando los nombres exactos

df_orders = pd.read_excel(archivo_excel, sheet_name="Sales Orders Sheet")
df_customers = pd.read_excel(archivo_excel, sheet_name="Customers Sheet")
df_stores = pd.read_excel(archivo_excel, sheet_name="Store Locations Sheet")
df_products = pd.read_excel(archivo_excel, sheet_name="Products Sheet")
df_sales_team = pd.read_excel(archivo_excel, sheet_name="Sales Team Sheet")


# 2) Uniendo Dataframes y creando la variable df_base

df_base = (
    df_orders
    .merge(df_customers, on="_CustomerID", how="left")
    .merge(df_stores, on="_StoreID", how="left")
    .merge(df_products, on="_ProductID", how="left")
    .merge(df_sales_team, on="_SalesTeamID", how="left")
)

# 3) Agregando nuevas columnas a df_ base

df_base["ProcurementDays"] = (df_base["OrderDate"] - df_base["ProcuredDate"]).dt.days
df_base["ShippingDays"] = (df_base["ShipDate"] - df_base["OrderDate"]).dt.days
df_base["DeliveryDays"] = (df_base["DeliveryDate"] - df_base["ShipDate"]).dt.days
df_base["CustomerDays"] = df_base["ShippingDays"] + df_base["DeliveryDays"]


# 4) Agregando nueva columna CustomerDaysInterval a df_base

intervalos = [0, 15, 30, 45, 60, 75, 90]
etiquetas = [
    "0 to 15 days",
    "15 to 30 days",
    "30 to 45 days",
    "45 to 60 days",
    "60 to 75 days",
    "75 to 90 days"
]

df_base["CustomerDaysInterval"] = pd.cut(
    df_base["CustomerDays"],
    bins=intervalos,
    labels=etiquetas,
    right=False,
    include_lowest=True
)

# 5) Creando tabla pivote para mostrar el conteo como reportes1

pivot_reporte_1 = pd.pivot_table(
    df_base,
    index="Sales Team",
    columns="CustomerDaysInterval",
    values="OrderNumber",
    aggfunc="count",
    fill_value=0,
    observed=False
)

# Columna ALL

pivot_reporte_1["All"] = pivot_reporte_1.sum(axis=1)

# Mostrar resultado ordenado por nombre del Sales Team

pivot_reporte_1 = pivot_reporte_1.sort_index()

# RESULTADO FINAL
pivot_reporte_1


Hojas disponibles: ['Sales Orders Sheet', 'Customers Sheet', 'Store Locations Sheet', 'Products Sheet', 'Sales Team Sheet']


CustomerDaysInterval,0 to 15 days,15 to 30 days,30 to 45 days,45 to 60 days,60 to 75 days,75 to 90 days,All
Sales Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Adam Hernandez,71,176,55,0,0,0,302
Anthony Berry,69,176,53,0,0,0,298
Anthony Torres,70,160,58,0,0,0,288
Carl Nguyen,85,184,45,0,0,0,314
Carlos Miller,56,145,46,0,0,0,247
Chris Armstrong,86,152,54,0,0,0,292
Donald Reynolds,87,167,42,0,0,0,296
Douglas Tucker,76,148,53,0,0,0,277
Frank Brown,71,156,48,0,0,0,275
George Lewis,92,169,54,0,0,0,315


## PARTE 2 

In [None]:
# 6) Agregando columna GrossMargin

df_base["GrossMargin"] = df_base["Order Quantity"] * (
(df_base["Unit Price"] * (1 - df_base["Discount Applied"])) - df_base["Unit Cost"]
)

df_base[["OrderNumber", "GrossMargin"]].head()

# 7) aplicar porcentaje a la columna GrossMargin

margenes = [0, 200000, 400000, 600000, 800000, float("inf")]
comisiones = [10.0, 12.5, 14.0, 16.25, 18.0]

df_base["CommissionsPercentage"] = pd.cut(
    df_base["GrossMargin"],
    bins=margenes,
    labels=comisiones,
    right=False,
    include_lowest=True
).astype(float)

# 8) Calculando el monto de la comisión utilizando la Tabla del Anexo

df_base["CommissionsAmount"] = df_base["GrossMargin"] * (df_base["CommissionsPercentage"] / 100)


# 9) Calculando la comisión sobre el margen bruto

df_base["NetMargin"] = df_base["GrossMargin"] - df_base["CommissionsAmount"]

df_base[["OrderNumber", "GrossMargin", "CommissionsPercentage", "CommissionsAmount", "NetMargin"]].head()

# 10) Creando Reporte 2 

margenes_oficiales = [0, 100, 1000, 10000, 100000, float("inf")]
comisiones_oficiales = [5.0, 10.0, 15.0, 20.0, 25.0] 


df_base["CommissionsPercentage"] = pd.cut(
    df_base["GrossMargin"],
    bins=margenes_oficiales,
    labels=comisiones_oficiales,
    right=False,
    include_lowest=True
).astype(float)


df_base.loc[df_base["CommissionsPercentage"] == 25.0, "CommissionsPercentage"] = 20.0

df_base["CommissionsAmount"] = df_base["GrossMargin"] * (df_base["CommissionsPercentage"] / 100)
df_base["NetMargin"] = df_base["GrossMargin"] - df_base["CommissionsAmount"]


pivot_reporte_2 = pd.pivot_table(
    df_base,
    index="Sales Team",
    values=["GrossMargin", "CommissionsAmount", "NetMargin"],
    aggfunc="sum"
)

pivot_reporte_2 = pivot_reporte_2.sort_index()

# RESULTADO FINAL.
pivot_reporte_2
