In [1]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

In [4]:
df = pd.read_csv("data/customer_shopping_data.csv")
df.head(50)

Unnamed: 0,invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall
0,I138884,C241288,Female,28,Clothing,5,1500.4,Credit Card,5/8/2022,Kanyon
1,I317333,C111565,Male,21,Shoes,3,1800.51,Debit Card,12/12/2021,Forum Istanbul
2,I127801,C266599,Male,20,Clothing,1,300.08,Cash,9/11/2021,Metrocity
3,I173702,C988172,Female,66,Shoes,5,3000.85,Credit Card,16/05/2021,Metropol AVM
4,I337046,C189076,Female,53,Books,4,60.6,Cash,24/10/2021,Kanyon
5,I227836,C657758,Female,28,Clothing,5,1500.4,Credit Card,24/05/2022,Forum Istanbul
6,I121056,C151197,Female,49,Cosmetics,1,40.66,Cash,13/03/2022,Istinye Park
7,I293112,C176086,Female,32,Clothing,2,600.16,Credit Card,13/01/2021,Mall of Istanbul
8,I293455,C159642,Male,69,Clothing,3,900.24,Credit Card,4/11/2021,Metrocity
9,I326945,C283361,Female,60,Clothing,2,600.16,Credit Card,22/08/2021,Kanyon


In [5]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99457 entries, 0 to 99456
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   invoice_no      99457 non-null  object 
 1   customer_id     99457 non-null  object 
 2   gender          99457 non-null  object 
 3   age             99457 non-null  int64  
 4   category        99457 non-null  object 
 5   quantity        99457 non-null  int64  
 6   price           99457 non-null  float64
 7   payment_method  99457 non-null  object 
 8   invoice_date    99457 non-null  object 
 9   shopping_mall   99457 non-null  object 
dtypes: float64(1), int64(2), object(7)
memory usage: 7.6+ MB


In [6]:
df.describe()

Unnamed: 0,age,quantity,price
count,99457.0,99457.0,99457.0
mean,43.427089,3.003429,689.256321
std,14.990054,1.413025,941.184567
min,18.0,1.0,5.23
25%,30.0,2.0,45.45
50%,43.0,3.0,203.3
75%,56.0,4.0,1200.32
max,69.0,5.0,5250.0


In [7]:
# Convertir la columna invoice_date a datetime

df['invoice_date'] = pd.to_datetime(df['invoice_date'], dayfirst=True)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99457 entries, 0 to 99456
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   invoice_no      99457 non-null  object        
 1   customer_id     99457 non-null  object        
 2   gender          99457 non-null  object        
 3   age             99457 non-null  int64         
 4   category        99457 non-null  object        
 5   quantity        99457 non-null  int64         
 6   price           99457 non-null  float64       
 7   payment_method  99457 non-null  object        
 8   invoice_date    99457 non-null  datetime64[ns]
 9   shopping_mall   99457 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(2), object(6)
memory usage: 7.6+ MB


In [9]:
# Crear columnas adicionales de año y mes
df['year'] = df['invoice_date'].dt.year
df['month'] = df['invoice_date'].dt.month

df.head()


Unnamed: 0,invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall,year,month
0,I138884,C241288,Female,28,Clothing,5,1500.4,Credit Card,2022-08-05,Kanyon,2022,8
1,I317333,C111565,Male,21,Shoes,3,1800.51,Debit Card,2021-12-12,Forum Istanbul,2021,12
2,I127801,C266599,Male,20,Clothing,1,300.08,Cash,2021-11-09,Metrocity,2021,11
3,I173702,C988172,Female,66,Shoes,5,3000.85,Credit Card,2021-05-16,Metropol AVM,2021,5
4,I337046,C189076,Female,53,Books,4,60.6,Cash,2021-10-24,Kanyon,2021,10


Kanyon: 41.0811° N, 29.0116° E

Forum Istanbul: 41.0430° N, 28.9314° E

Metrocity: 41.0790° N, 29.0127° E

Metropol AVM: 40.9871° N, 29.1187° E

In [10]:
# Crear un diccionario con las coordenadas de los centros comerciales
mall_coordinates = {
    'Kanyon': {'latitude': 41.0811, 'longitude': 29.0116},
    'Forum Istanbul': {'latitude': 41.0430, 'longitude': 28.9314},
    'Metrocity': {'latitude': 41.0790, 'longitude': 29.0127},
    'Metropol AVM': {'latitude': 40.9871, 'longitude': 29.1187}
}


In [11]:
unique_values = {column: df[column].unique() for column in df.columns}

unique_values

{'invoice_no': array(['I138884', 'I317333', 'I127801', ..., 'I824010', 'I702964',
        'I232867'], dtype=object),
 'customer_id': array(['C241288', 'C111565', 'C266599', ..., 'C103292', 'C800631',
        'C273973'], dtype=object),
 'gender': array(['Female', 'Male'], dtype=object),
 'age': array([28, 21, 20, 66, 53, 49, 32, 69, 60, 36, 29, 67, 25, 24, 65, 42, 46,
        23, 27, 52, 44, 51, 50, 68, 43, 59, 54, 48, 40, 41, 19, 18, 22, 61,
        45, 64, 33, 63, 34, 47, 38, 57, 30, 26, 62, 39, 55, 56, 35, 31, 37,
        58]),
 'category': array(['Clothing', 'Shoes', 'Books', 'Cosmetics', 'Food & Beverage',
        'Toys', 'Technology', 'Souvenir'], dtype=object),
 'quantity': array([5, 3, 1, 4, 2]),
 'price': array([1.50040e+03, 1.80051e+03, 3.00080e+02, 3.00085e+03, 6.06000e+01,
        4.06600e+01, 6.00160e+02, 9.00240e+02, 1.04600e+01, 1.51500e+01,
        1.43360e+02, 3.03000e+01, 1.56900e+01, 5.23000e+00, 5.25000e+03,
        7.57500e+01, 7.16800e+01, 2.03300e+02, 2.40068e+03,

1. Distribución de la Edad de los Clientes:

Utiliza un histograma para mostrar la distribución de la edad de los clientes.

2. Cantidad de Compras por Categoría:

Utiliza un gráfico de barras para mostrar la cantidad de compras por cada categoría (Clothing y Shoes).


3. Método de Pago Utilizado:

Utiliza un gráfico de barras para mostrar la cantidad de compras realizadas por cada método de pago (Credit Card, Debit Card, Cash).

4. Género de los Clientes:

Utiliza un gráfico de torta (pie chart) para mostrar la proporción de clientes por género.

4. Número de Compras por Mes y Año:

Utiliza un gráfico de barras para mostrar el número de compras realizadas cada mes y año.

Precio Total por Categoría:

Utiliza un gráfico de barras para mostrar el precio total gastado en cada categoría.
Ubicación de las Compras en el Mapa:

Utiliza un scatter plot para mostrar las ubicaciones de los centros comerciales en un mapa.