# Análisis y Predicción de Ventas en una Tienda de Retail

In [47]:
import numpy as np

In [48]:
def cargar_datos(ruta_archivo):
    try:
        datos = np.genfromtxt(ruta_archivo, delimiter=',', skip_header=1, dtype='str')
        return datos
    except FileNotFoundError:
        print(f"El archivo no se encontró: {ruta_archivo}")
        return None

if __name__ == "__main__":
    ruta_archivo = "../data/retail_sales_dataset.csv"
    datos = cargar_datos(ruta_archivo)
    if datos is not None:
        print(datos)

[['1' '2023-11-24' 'CUST001' ... '3' '50' '150']
 ['2' '2023-02-27' 'CUST002' ... '2' '500' '1000']
 ['3' '2023-01-13' 'CUST003' ... '1' '30' '30']
 ...
 ['998' '2023-10-29' 'CUST998' ... '4' '25' '100']
 ['999' '2023-12-05' 'CUST999' ... '3' '50' '150']
 ['1000' '2023-04-12' 'CUST1000' ... '4' '30' '120']]


In [49]:
data_filtered = datos[:,3:]
print(data_filtered)

[['Male' '34' 'Beauty' '3' '50' '150']
 ['Female' '26' 'Clothing' '2' '500' '1000']
 ['Male' '50' 'Electronics' '1' '30' '30']
 ...
 ['Female' '23' 'Beauty' '4' '25' '100']
 ['Female' '36' 'Electronics' '3' '50' '150']
 ['Male' '47' 'Electronics' '4' '30' '120']]


In [50]:
titles = ['genero','edad','categoria','cantidad','precio','ventas']
print(titles)
print(data_filtered)

['genero', 'edad', 'categoria', 'cantidad', 'precio', 'ventas']
[['Male' '34' 'Beauty' '3' '50' '150']
 ['Female' '26' 'Clothing' '2' '500' '1000']
 ['Male' '50' 'Electronics' '1' '30' '30']
 ...
 ['Female' '23' 'Beauty' '4' '25' '100']
 ['Female' '36' 'Electronics' '3' '50' '150']
 ['Male' '47' 'Electronics' '4' '30' '120']]


In [51]:
categories = data_filtered[:,2]
print(type(categories))
print(categories)

<class 'numpy.ndarray'>
['Beauty' 'Clothing' 'Electronics' 'Clothing' 'Beauty' 'Beauty' 'Clothing'
 'Electronics' 'Electronics' 'Clothing' 'Clothing' 'Beauty' 'Electronics'
 'Clothing' 'Electronics' 'Clothing' 'Clothing' 'Electronics' 'Clothing'
 'Clothing' 'Beauty' 'Clothing' 'Clothing' 'Clothing' 'Beauty'
 'Electronics' 'Beauty' 'Beauty' 'Electronics' 'Beauty' 'Electronics'
 'Beauty' 'Electronics' 'Clothing' 'Beauty' 'Beauty' 'Beauty' 'Beauty'
 'Clothing' 'Beauty' 'Clothing' 'Clothing' 'Clothing' 'Clothing'
 'Electronics' 'Electronics' 'Beauty' 'Electronics' 'Electronics' 'Beauty'
 'Beauty' 'Beauty' 'Electronics' 'Electronics' 'Beauty' 'Clothing'
 'Beauty' 'Clothing' 'Clothing' 'Beauty' 'Beauty' 'Beauty' 'Electronics'
 'Clothing' 'Electronics' 'Electronics' 'Beauty' 'Electronics' 'Beauty'
 'Clothing' 'Beauty' 'Electronics' 'Electronics' 'Beauty' 'Beauty'
 'Electronics' 'Clothing' 'Clothing' 'Beauty' 'Clothing' 'Electronics'
 'Beauty' 'Electronics' 'Electronics' 'Clothing' 'Beauty' 'B

In [52]:
unique_categories = np.unique(categories)
print(unique_categories)

['Beauty' 'Clothing' 'Electronics']


In [53]:
sales = data_filtered[:,5]
print(type(sales))
sales = sales.astype(float)
print(sales)

<class 'numpy.ndarray'>
[ 150. 1000.   30.  500.  100.   30.   50.  100.  600.  200.  100.   75.
 1500.  120. 2000. 1500.  100.   50.   50.  900.  500.  100.  120.  300.
   50. 1000.   50.  500.   30.  900. 1200.   90.  100.  150.  900.  900.
   75.  200.  120.   50.   50.  900.  300.   25.   30. 1200. 1500.  900.
 1000.   75.   75.  300.  100. 1500.  120.  900.   30. 1200.   50.  150.
  200.  100.   50.  100. 2000.   30. 1200.  300.   75.  300.  100. 2000.
   90. 2000.  200.  100.  100. 1500.  300.   60.   50.  200.  100.   90.
  150.   90.  100.  500. 2000.   30.  500.  120. 2000. 1000.   60.  600.
 1000.  100. 1200.   30.  600.   50.   25. 1000.  500.   50. 1200.   75.
 2000.  900. 1500. 1500.   50.  100. 1500.   30. 1000. 2000.  150.   50.
  200.  120.   60. 2000.  100.   90.   50.  500.  600.  500.  600.  200.
  900.   50.   50.  600. 1000.  200. 2000.   30.   50. 1200.   50. 1500.
   75.  200.  300.   60.   75.  120.   50. 2000. 1000.  900. 2000.  100.
 2000.  600.  200.  100. 10

In [54]:
# Función que calcula el total de ventas por categoría
def sales_by_category(sales, categories):
    sales_totals = {category: np.sum(sales[categories == category]) for category in ['Beauty', 'Clothing', 'Electronics']}
    return sales_totals

category_totals = sales_by_category(sales, categories)

# Imprimir los resultados
for category, total in category_totals.items():
    print(f'Total de ventas en la sección "{category}": {total}')

Total de ventas en la sección "Beauty": 143515.0
Total de ventas en la sección "Clothing": 155580.0
Total de ventas en la sección "Electronics": 156905.0


In [55]:
# Función que calcula el promedio de ventas por categoría
def mean_by_category(sales, categories):
    means = {category: np.mean(sales[categories == category]) for category in ['Beauty', 'Clothing', 'Electronics']}
    return means

category_mean = mean_by_category(sales, categories)

# Imprime los resultados
for category, mean in category_mean.items():
    print(f'Promedio total de ventas en la sección "{category}": {mean}')

Promedio total de ventas en la sección "Beauty": 467.4755700325733
Promedio total de ventas en la sección "Clothing": 443.2478632478632
Promedio total de ventas en la sección "Electronics": 458.7865497076023


In [57]:
# Función que identifica la categoría con mayor y menor venta
def more_less_sales(category_totals, unique_categories):
    more_sales_index = np.argmax(category_totals)
    less_sales_index = np.argmin(category_totals)
    return unique_categories[more_sales_index], unique_categories[less_sales_index]

# Obtener las categorías con más y menos ventas
most_sales, least_sales = more_less_sales(category_totals, unique_categories)

# Imprimir los resultados
print("Categoría con mayor ventas:", most_sales)
print("Categoría con menor ventas:", least_sales)

Categoría con mayor ventas: Beauty
Categoría con menor ventas: Beauty


In [58]:
beauty = sales[categories == 'Beauty']
print(beauty)

[ 150.  100.   30.   75.  500.   50.   50.  500.  900.   90.  900.  900.
   75.  200.   50. 1500.   75.   75.  300.  120.   30.  150.  200.  100.
 1200.   75.  100. 2000.  200.  300.  200.   90.  100. 2000. 1000. 1000.
  100.   50. 1000.   75.  100.   50.  500.  600.  600. 1000. 2000. 1500.
 1000. 1500. 1500.   50.  300.  100.  100.  120.  900.   50.   90.   25.
  100. 1500.  900. 1500.  150.   25.   50. 1500. 1500.   60.   90.  500.
  600.  100.   90.   25.   25.  600. 1000.  500.  300.  100.  200. 2000.
   60.  120.   60.   90.  120.   50.  100. 2000.  600. 1200.  900. 1200.
  600.   30. 1200.   25.   25.  100. 1500.  900.  100.  200.  120.  150.
  100.   30.   50.   75.  200.  300.   25.  500.   25. 1500.  600.   75.
   30.  300.   90.   30.   50. 1000.   30.   25.   60.  100.  500.   75.
  100. 1200.  200.  900. 1200. 2000.   60.   50.   25.  900.   50. 1000.
 1500.   75.  900.   50. 2000.  100.   50.  200.   60.   60.  100. 2000.
  150.   50. 1500.  600.  200.  100.   25.  100. 15

In [59]:
clothing = sales[categories == 'Clothing']
print(clothing)

[1000.  500.   50.  200.  100.  120. 1500.  100.   50.  900.  100.  120.
  300.  150.  120.   50.  900.  300.   25.  900. 1200.   50.  100.  300.
  100. 1500.   60.  150.  500.   60.  600.  600.   25.   50. 1200.  900.
 1500. 1500.   30.  150. 2000.  100.   90.   50.  500.   50.  200.   50.
   75.  200.   60.   75.   50.  100.  200.  100.   60.  150. 1200. 2000.
  150.  300.   50.  900.   60.  900.   25.  200.  100.   75.  200.   30.
  900.  200. 1200. 1000.   25.   25. 1500. 1500.  120.   25.   50.   50.
  150.   25.   25.   90.  900.   50. 2000.   30. 1000.   50.  200.   50.
  900.  900. 2000. 1000. 1000.  100.  100.  500. 1500.  200.  100.  120.
  600.   90. 1200.  120.  120.  120.   60.   50.   25.  500.   75.  500.
 1200.  200. 2000. 1000.   90.   50.  100.   25.  300.  100. 1200.   50.
  200.   25.  100. 1000.  500.   50.  600.  500.  600.  200.  300.  600.
  600. 1200.  100.   60.   90. 2000. 1500.   90.   25.  120.   30.   75.
  600.  100.  600.   90. 1500. 1000. 1200. 1000.  1

In [60]:
electronics = sales[categories == 'Electronics']
print(electronics)

[  30.  100.  600. 1500. 2000.   50. 1000.   30. 1200.  100.   30. 1200.
  900. 1000.  100. 1500.   50. 2000.   30.  300. 2000.   90.  100.   50.
  100.   90. 2000.   30.  500.  120. 1200.   30.  500. 2000. 1500.   50.
 1000. 2000.  200.  120.   60.  200.  900.   50.  600.   30.   50. 1200.
  300.  120. 2000. 1000.  900. 2000. 2000.  600.  120.  100.  300. 1200.
  200.   25.  200.  200.  200.  100.  200.   90.  100.   60.   50. 1000.
 1500.   75.  900.   50.   60.   50.  300.  500.   60.   30.  300.  100.
  200.  500.   25.   50.   60.   90. 1000. 1000.  200.   90.   60.   50.
   50.   90. 1200.   50.  500.  150.   50.  150.  100.   90. 1200. 1200.
  900.   50.   50.   30.   25.  600. 1000.  500.  500. 1500.   75. 1200.
   50. 1500.   60.  600. 1500.  600.   25.  100. 1000. 1000.  900.  900.
  200. 2000. 2000.  900. 1000.  120.  150.   25.  200.   50.  900. 1200.
 1000.   50. 1200.  300.   50.  200.   30.  100.   60.  100. 1200.  600.
  150.  100.  150.   25. 1200. 1200.   30.   25.  9

In [61]:
genres = data_filtered[:,0]
print(type(genres))
print(genres)

<class 'numpy.ndarray'>
['Male' 'Female' 'Male' 'Male' 'Male' 'Female' 'Male' 'Male' 'Male'
 'Female' 'Male' 'Male' 'Male' 'Male' 'Female' 'Male' 'Female' 'Female'
 'Female' 'Male' 'Female' 'Male' 'Female' 'Female' 'Female' 'Female'
 'Female' 'Female' 'Female' 'Female' 'Male' 'Male' 'Female' 'Female'
 'Female' 'Male' 'Female' 'Male' 'Male' 'Male' 'Male' 'Male' 'Female'
 'Female' 'Female' 'Female' 'Female' 'Male' 'Female' 'Female' 'Male'
 'Female' 'Male' 'Female' 'Male' 'Female' 'Female' 'Male' 'Male' 'Male'
 'Male' 'Male' 'Male' 'Male' 'Male' 'Female' 'Female' 'Male' 'Female'
 'Female' 'Female' 'Female' 'Male' 'Female' 'Male' 'Female' 'Female'
 'Female' 'Male' 'Female' 'Male' 'Female' 'Male' 'Female' 'Male' 'Male'
 'Female' 'Male' 'Female' 'Female' 'Female' 'Female' 'Female' 'Female'
 'Female' 'Female' 'Female' 'Female' 'Female' 'Male' 'Male' 'Female'
 'Female' 'Female' 'Female' 'Female' 'Female' 'Female' 'Female' 'Male'
 'Female' 'Male' 'Female' 'Female' 'Male' 'Female' 'Male' 'Female

In [62]:
male = sales[genres == 'Male']
print(male)

[ 150.   30.  500.  100.   50.  100.  600.  100.   75. 1500.  120. 1500.
  900.  100. 1200.   90.  900.  200.  120.   50.   50.  900.  900.   75.
  100.  120. 1200.   50.  150.  200.  100.   50.  100. 2000.  300.   90.
  200.  300.   50.  100.  150.   90.  500.   30.  600.  900. 1500. 1500.
 1000.   50.  120. 2000.  100.  500.  200.  900.   50.   50.  600. 1000.
  200. 2000.   30. 1200.  200.  300.   60.   75.   50. 2000. 1000.  900.
 2000. 2000.  200. 1000.   60. 2000.  300. 1500.   50.  120.  100.   60.
  300.  900. 1200.  120.  200.   25.  200.   75.   50.   25.  100. 1500.
  200.   30. 1500.  150.   25. 1000.   25.   25.  200. 1500. 1500. 1500.
   60. 1500.  100.   90.  500.  600.  120.  100.   90.   25. 1500.   25.
  100.   90.   60.  900.   50.   50.  300.  500.   30. 1000. 2000.   60.
   50.   60.  900.  900. 2000.  300. 1000.  100.  500.  200.   50.  100.
  120.   60.  600. 1200.   90. 1200. 1000.  120.  600.   90.   50.  120.
  120.   60.   90.   25.  500.  500.  150.  100.   

In [63]:
female = sales[genres == 'Female']
print(female)

[1000.   30.  200. 2000.  100.   50.   50.  500.  120.  300.   50. 1000.
   50.  500.   30.  900.  100.  150.  900.   75.  300.   25.   30. 1200.
 1500. 1000.   75.  300. 1500.  900.   30.   30. 1200.   75.  300.  100.
 2000. 2000.  100.  100. 1500.   60.  200.   90.  100. 2000.   30.  500.
  120. 2000. 1000.   60.  600. 1000.  100. 1200.   50.   25. 1000.  500.
   50. 1200.   75. 2000. 1500.   50.  100.   30. 2000.  150.  200.   60.
   90.   50.  600.  500.  600.   50.   50. 1500.   75.  120.  100.  600.
  100.  150. 1500. 1200.  150.   50.  900.  300.  100.  100.  900.  100.
   90.  900.  200.  900. 1200.   25.   50.  200.  200.  200.   90.   25.
   50.  100.   50.   60.  150.   25.  600.   50. 1000.   25. 1000.  500.
  300.   75.  900.   50.  200. 2000.   50.  200.  120.   60.   90.   30.
  120.  100.   50. 1000.  100.  100. 1500. 2000.  200.  500.   25.  600.
   90.  900. 1200. 1000.  200.   60.   30.   50. 1200.   25.   25.  100.
 1500.   50. 1200.   50.  900.  150.   50.   75.  1

In [64]:
# Total de ventas por género
def sales_by_genre(sales, genres):
    return np.sum(sales[genres == 'Female']), np.sum(sales[genres == 'Male'])

sales_female, sales_male = sales_by_genre(sales, genres)

print(f'Total de ventas por género femenino: {sales_female}')
print(f'Total de ventas por género masculino: {sales_male}')

Total de ventas por género femenino: 232840.0
Total de ventas por género masculino: 223160.0
