In [1]:
import pandas as pd

file_path = 'shopping_trends.csv'
data = pd.read_csv(file_path)

data.head()


Unnamed: 0,Customer ID,Age,Gender,Item Purchased,Category,Purchase Amount (USD),Location,Size,Color,Season,Review Rating,Subscription Status,Payment Method,Shipping Type,Discount Applied,Promo Code Used,Previous Purchases,Preferred Payment Method,Frequency of Purchases
0,1,55,Male,Blouse,Clothing,53,Kentucky,L,Gray,Winter,3.1,Yes,Credit Card,Express,Yes,Yes,14,Venmo,Fortnightly
1,2,19,Male,Sweater,Clothing,64,Maine,L,Maroon,Winter,3.1,Yes,Bank Transfer,Express,Yes,Yes,2,Cash,Fortnightly
2,3,50,Male,Jeans,Clothing,73,Massachusetts,S,Maroon,Spring,3.1,Yes,Cash,Free Shipping,Yes,Yes,23,Credit Card,Weekly
3,4,21,Male,Sandals,Footwear,90,Rhode Island,M,Maroon,Spring,3.5,Yes,PayPal,Next Day Air,Yes,Yes,49,PayPal,Weekly
4,5,45,Male,Blouse,Clothing,49,Oregon,M,Turquoise,Spring,2.7,Yes,Cash,Free Shipping,Yes,Yes,31,PayPal,Annually


In [2]:
data['Previous Purchases'] = pd.to_numeric(data['Previous Purchases'])
data['Review Rating'] = pd.to_numeric(data['Review Rating'])

max_frequency = data['Frequency of Purchases'].map({
    'Weekly': 4, 'Fortnightly': 2, 'Monthly': 1, 'Annually': 0.0833
}).max()
data['Frequency Value'] = data['Frequency of Purchases'].map({
    'Weekly': 4, 'Fortnightly': 2, 'Monthly': 1, 'Annually': 0.0833
})
max_previous_purchases = data['Previous Purchases'].max()

data['Índice de Lealtad'] = (data['Frequency Value'] / max_frequency) * (data['Previous Purchases'] / max_previous_purchases)

data['Puntuación de Satisfacción Ajustada'] = data['Review Rating'] * (1 + (data['Previous Purchases'] / 10))


purchase_amount_percentiles = data['Purchase Amount (USD)'].quantile([0.25, 0.75])
low_threshold = purchase_amount_percentiles[0.25]
high_threshold = purchase_amount_percentiles[0.75]

def categorize_purchase_amount(amount):
    if amount < low_threshold:
        return 'Bajo'
    elif amount > high_threshold:
        return 'Alto'
    else:
        return 'Medio'

data['Segmentación de Clientes'] = data['Purchase Amount (USD)'].apply(categorize_purchase_amount)

data['Promoción Aplicada'] = data[['Discount Applied', 'Promo Code Used']].apply(
    lambda x: 1 if 'Yes' in x.values else 0, axis=1)
factor_enganche = (data['Promoción Aplicada'].sum() / len(data)) * 100


diversidad_productos = data.groupby('Customer ID')['Category'].nunique().reset_index()
diversidad_productos.columns = ['Customer ID', 'Diversidad de Productos']
data = pd.merge(data, diversidad_productos, on='Customer ID')

data.head()




Unnamed: 0,Customer ID,Age,Gender,Item Purchased,Category,Purchase Amount (USD),Location,Size,Color,Season,...,Promo Code Used,Previous Purchases,Preferred Payment Method,Frequency of Purchases,Frequency Value,Índice de Lealtad,Puntuación de Satisfacción Ajustada,Segmentación de Clientes,Promoción Aplicada,Diversidad de Productos
0,1,55,Male,Blouse,Clothing,53,Kentucky,L,Gray,Winter,...,Yes,14,Venmo,Fortnightly,2.0,0.14,7.44,Medio,1,1
1,2,19,Male,Sweater,Clothing,64,Maine,L,Maroon,Winter,...,Yes,2,Cash,Fortnightly,2.0,0.02,3.72,Medio,1,1
2,3,50,Male,Jeans,Clothing,73,Massachusetts,S,Maroon,Spring,...,Yes,23,Credit Card,Weekly,4.0,0.46,10.23,Medio,1,1
3,4,21,Male,Sandals,Footwear,90,Rhode Island,M,Maroon,Spring,...,Yes,49,PayPal,Weekly,4.0,0.98,20.65,Alto,1,1
4,5,45,Male,Blouse,Clothing,49,Oregon,M,Turquoise,Spring,...,Yes,31,PayPal,Annually,0.0833,0.012911,11.07,Medio,1,1


In [3]:
import plotly.express as px

#1.Distribución de la edad por género
fig_age_gender = px.histogram(data, x='Age', color='Gender', nbins=20, title='Distribución de la Edad por Género')
fig_age_gender.update_layout(bargap=0.2)
fig_age_gender.show()

#2.Preferencias de compra por género
purchase_preferences = data.groupby(['Gender', 'Category']).size().reset_index(name='Count')
fig_purchase_preferences = px.bar(purchase_preferences, x='Category', y='Count', color='Gender', barmode='group', title='Preferencias de Compra por Género')
fig_purchase_preferences.show()

#3.Métodos de pago preferidos por edad
age_bins = pd.cut(data['Age'], bins=[0, 18, 25, 35, 45, 55, 65, 100], labels=['0-18', '19-25', '26-35', '36-45', '46-55', '56-65', '66+'])
data['Age Group'] = age_bins
fig_payment_methods = px.box(data, x='Preferred Payment Method', y='Age', color='Preferred Payment Method', title='Métodos de Pago Preferidos por Edad')
fig_payment_methods.show()



In [4]:
import plotly.express as px

promociones_por_cliente = data.groupby('Customer ID')['Promoción Aplicada'].sum().reset_index(name='Promociones Usadas')
lealtad_por_cliente = data[['Customer ID', 'Índice de Lealtad']].drop_duplicates()

analisis_promociones = pd.merge(promociones_por_cliente, lealtad_por_cliente, on='Customer ID')

#1.Gráfico de dispersión
fig_scatter = px.scatter(analisis_promociones, x='Promociones Usadas', y='Índice de Lealtad', title='Relación entre Promociones Usadas e Índice de Lealtad')
fig_scatter.show()

#2.Gráfico de cajas
analisis_promociones['Grupo de Promociones'] = pd.cut(analisis_promociones['Promociones Usadas'], bins=[-1, 0, 5, 10, 20, 50], labels=['0', '1-5', '6-10', '11-20', '21+'])
fig_box = px.box(analisis_promociones, x='Grupo de Promociones', y='Índice de Lealtad', title='Índice de Lealtad por Uso de Promociones')
fig_box.show()


In [5]:
import plotly.express as px

#1.Patrones de gasto por estación
fig_box_season = px.box(data, x='Season', y='Purchase Amount (USD)', title='Distribución de los Montos de Compra por Estación')
fig_box_season.update_layout(xaxis_title='Estación', yaxis_title='Monto de Compra (USD)')
fig_box_season.show()

#2.Tipos de productos comprados por estación
productos_por_estacion = data.groupby(['Season', 'Category']).size().reset_index(name='Count')
fig_bar_season = px.bar(productos_por_estacion, x='Season', y='Count', color='Category', barmode='group', title='Categorías de Productos Comprados por Estación')
fig_bar_season.update_layout(xaxis_title='Estación', yaxis_title='Cantidad de Productos')
fig_bar_season.show()


In [6]:
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm

data['Future Purchases'] = data.groupby('Customer ID')['Previous Purchases'].transform('sum')

# 1. Gráfico de dispersión
fig_scatter_satisfaction = px.scatter(data, x='Review Rating', y='Future Purchases', title='Relación entre Calificaciones de los Productos y Compras Futuras')
fig_scatter_satisfaction.update_layout(xaxis_title='Calificación de los Productos', yaxis_title='Compras Futuras')
fig_scatter_satisfaction.show()

# 2. Gráfico de regresión
X = data['Review Rating']
y = data['Future Purchases']
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
predictions = model.predict(X)

fig_regression = go.Figure()
fig_regression.add_trace(go.Scatter(x=data['Review Rating'], y=data['Future Purchases'], mode='markers', name='Datos'))
fig_regression.add_trace(go.Scatter(x=data['Review Rating'], y=predictions, mode='lines', name='Regresión Lineal', line=dict(color='red')))
fig_regression.update_layout(title='Regresión Lineal: Calificación de los Productos vs Compras Futuras', xaxis_title='Calificación de los Productos', yaxis_title='Compras Futuras')
fig_regression.show()


In [7]:
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm
import numpy as np

data['Future Purchases'] = data.groupby('Customer ID')['Previous Purchases'].transform('sum')

# 1.Gráfico de correlación interactivo
correlation_matrix = data[['Review Rating', 'Future Purchases']].corr()
fig_corr = px.imshow(correlation_matrix, text_auto=True, title='Matriz de Correlación: Satisfacción vs Compras Futuras')
fig_corr.show()

# 2.Regresión lineal mejorada
X = data['Review Rating']
y = data['Future Purchases']
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
predictions = model.predict(X)
confidence_interval = model.get_prediction(X).conf_int()

fig_regression = go.Figure()


fig_regression.add_trace(go.Scatter(
    x=data['Review Rating'], y=data['Future Purchases'],
    mode='markers', name='Datos',
    marker=dict(color='blue', opacity=0.6)
))


fig_regression.add_trace(go.Scatter(
    x=data['Review Rating'], y=predictions,
    mode='lines', name='Regresión Lineal',
    line=dict(color='red')
))


fig_regression.add_trace(go.Scatter(
    x=data['Review Rating'], y=confidence_interval[:, 0],
    mode='lines', name='Confianza Inferior',
    line=dict(color='lightgrey'), fill=None
))
fig_regression.add_trace(go.Scatter(
    x=data['Review Rating'], y=confidence_interval[:, 1],
    mode='lines', name='Confianza Superior',
    line=dict(color='lightgrey'), fill='tonexty'
))

fig_regression.update_layout(
    title='Regresión Lineal Mejorada: Calificación de los Productos vs Compras Futuras',
    xaxis_title='Calificación de los Productos',
    yaxis_title='Compras Futuras'
)

fig_regression.show()


In [8]:
!pip install streamlit
!pip install pycountry

Collecting streamlit
  Downloading streamlit-1.35.0-py2.py3-none-any.whl (8.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
Collecting watchdog>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.1-py3-none-manylinux2014_x86_64.whl (83 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.0/83.0 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.

In [9]:
%%writefile app2.py

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import pycountry
import statsmodels.api as sm
import numpy as np


file_path = 'shopping_trends.csv'
data = pd.read_csv(file_path)


if 'page' not in st.session_state:
    st.session_state.page = 0


data['Previous Purchases'] = pd.to_numeric(data['Previous Purchases'])
data['Review Rating'] = pd.to_numeric(data['Review Rating'])


max_frequency = data['Frequency of Purchases'].map({
    'Weekly': 4, 'Fortnightly': 2, 'Monthly': 1, 'Annually': 0.0833
}).max()
data['Frequency Value'] = data['Frequency of Purchases'].map({
    'Weekly': 4, 'Fortnightly': 2, 'Monthly': 1, 'Annually': 0.0833
})
max_previous_purchases = data['Previous Purchases'].max()

data['Índice de Lealtad'] = (data['Frequency Value'] / max_frequency) * (data['Previous Purchases'] / max_previous_purchases)


data['Puntuación de Satisfacción Ajustada'] = data['Review Rating'] * (1 + (data['Previous Purchases'] / 10))


purchase_amount_percentiles = data['Purchase Amount (USD)'].quantile([0.25, 0.75])
low_threshold = purchase_amount_percentiles[0.25]
high_threshold = purchase_amount_percentiles[0.75]

def categorize_purchase_amount(amount):
    if amount < low_threshold:
        return 'Bajo'
    elif amount > high_threshold:
        return 'Alto'
    else:
        return 'Medio'

data['Segmentación de Clientes'] = data['Purchase Amount (USD)'].apply(categorize_purchase_amount)


data['Promoción Aplicada'] = data[['Discount Applied', 'Promo Code Used']].apply(
    lambda x: 1 if 'Yes' in x.values else 0, axis=1)
factor_enganche = (data['Promoción Aplicada'].sum() / len(data)) * 100


diversidad_productos = data.groupby('Customer ID')['Category'].nunique().reset_index()
diversidad_productos.columns = ['Customer ID', 'Diversidad de Productos']
data = pd.merge(data, diversidad_productos, on='Customer ID')


st.set_page_config(layout="wide")


def ventana1():
    st.header("Distribución de la Edad por Género")

    col1, col2, col3 = st.columns([1, 1, 1])


    #1.Distribución de la edad por género
    fig_age_gender = px.histogram(data, x='Age', color='Gender', nbins=20, title='Distribución de la Edad por Género')
    fig_age_gender.update_layout(bargap=0.2)
    #st.plotly_chart(fig_age_gender)

    with col1:
      st.plotly_chart(fig_age_gender)


    #2.Preferencias de compra por género
    purchase_preferences = data.groupby(['Gender', 'Category']).size().reset_index(name='Count')
    fig_purchase_preferences = px.bar(purchase_preferences, x='Category', y='Count', color='Gender', barmode='group', title='Preferencias de Compra por Género')
    #st.plotly_chart(fig_purchase_preferences)

    with col2:
      st.plotly_chart(fig_purchase_preferences)

    #3.Métodos de pago preferidos por edad
    age_bins = pd.cut(data['Age'], bins=[0, 18, 25, 35, 45, 55, 65, 100], labels=['0-18', '19-25', '26-35', '36-45', '46-55', '56-65', '66+'])
    data['Age Group'] = age_bins
    fig_payment_methods = px.box(data, x='Preferred Payment Method', y='Age', color='Preferred Payment Method', title='Métodos de Pago Preferidos por Edad')
    #st.plotly_chart(fig_payment_methods)

    with col3:
      st.plotly_chart(fig_payment_methods)

def ventana2():
    st.header("Correlación entre Promociones y Lealtad del Cliente")


    promociones_por_cliente = data.groupby('Customer ID')['Promoción Aplicada'].sum().reset_index(name='Promociones Usadas')
    lealtad_por_cliente = data[['Customer ID', 'Índice de Lealtad']].drop_duplicates()

    analisis_promociones = pd.merge(promociones_por_cliente, lealtad_por_cliente, on='Customer ID')

    col1, col2 = st.columns([1, 1])
    # 1. Gráfico de dispersión
    fig_scatter = px.scatter(analisis_promociones, x='Promociones Usadas', y='Índice de Lealtad', title='Relación entre Promociones Usadas e Índice de Lealtad')
    #st.plotly_chart(fig_scatter)
    with col1:
      st.plotly_chart(fig_scatter)

    #2.Gráfico de cajas
    analisis_promociones['Grupo de Promociones'] = pd.cut(analisis_promociones['Promociones Usadas'], bins=[-1, 0, 5, 10, 20, 50], labels=['0', '1-5', '6-10', '11-20', '21+'])
    fig_box = px.box(analisis_promociones, x='Grupo de Promociones', y='Índice de Lealtad', title='Índice de Lealtad por Uso de Promociones')
    #st.plotly_chart(fig_box)
    with col2:
      st.plotly_chart(fig_box)


def ventana3():
    st.header("Influencias de las Estaciones en el Comportamiento de Compra")

    col1, col2 = st.columns([1, 1])
    #1.Patrones de gasto por estación
    fig_box_season = px.box(data, x='Season', y='Purchase Amount (USD)', title='Distribución de los Montos de Compra por Estación')
    #st.plotly_chart(fig_box_season)
    with col1:
      st.plotly_chart(fig_box_season)

    #2.Tipos de productos comprados por estación
    productos_por_estacion = data.groupby(['Season', 'Category']).size().reset_index(name='Count')
    fig_bar_season = px.bar(productos_por_estacion, x='Season', y='Count', color='Category', barmode='group', title='Categorías de Productos Comprados por Estación')
    #st.plotly_chart(fig_bar_season)
    with col2:
      st.plotly_chart(fig_bar_season)

def ventana4():
    st.header("Relación entre la Satisfacción del Cliente y las Compras Futuras")

    data['Future Purchases'] = data.groupby('Customer ID')['Previous Purchases'].transform('sum')

    col1, col2 = st.columns([1, 1])
    #1.Gráfico de correlación interactivo
    correlation_matrix = data[['Review Rating', 'Future Purchases']].corr()
    fig_corr = go.Figure(data=go.Heatmap(
                       z=correlation_matrix.values,
                       x=correlation_matrix.columns,
                       y=correlation_matrix.columns,
                       colorscale='Blues'))
    fig_corr.update_layout(title='Matriz de Correlación: Satisfacción vs Compras Futuras')
    #st.plotly_chart(fig_corr)
    with col1:
      st.plotly_chart(fig_corr)

    #2.Regresión lineal mejorada
    X = data['Review Rating']
    y = data['Future Purchases']
    X = sm.add_constant(X)
    model = sm.OLS(y, X).fit()
    predictions = model.predict(X)
    confidence_interval = model.get_prediction(X).conf_int()


    fig_regression = go.Figure()


    fig_regression.add_trace(go.Scatter(
        x=data['Review Rating'], y=data['Future Purchases'],
        mode='markers', name='Datos',
        marker=dict(color='blue', opacity=0.6)
    ))


    fig_regression.add_trace(go.Scatter(
        x=data['Review Rating'], y=predictions,
        mode='lines', name='Regresión Lineal',
        line=dict(color='red')
    ))


    fig_regression.add_trace(go.Scatter(
        x=data['Review Rating'], y=confidence_interval[:, 0],
        mode='lines', name='Confianza Inferior',
        line=dict(color='lightgrey'), fill=None
    ))
    fig_regression.add_trace(go.Scatter(
        x=data['Review Rating'], y=confidence_interval[:, 1],
        mode='lines', name='Confianza Superior',
        line=dict(color='lightgrey'), fill='tonexty'
    ))

    fig_regression.update_layout(
        title='Regresión Lineal Mejorada: Calificación de los Productos vs Compras Futuras',
        xaxis_title='Calificación de los Productos',
        yaxis_title='Compras Futuras'
    )
    #st.plotly_chart(fig_regression)
    with col2:
      st.plotly_chart(fig_regression)


#Lista de gráficos
visualizations = [
    ventana1,
    ventana2,
    ventana3,
    ventana4
]


visualizations[st.session_state.page]()

#Botones de navegacion
col1, col2, col3 = st.columns([1, 1, 1])

if col1.button('Previous'):
    if st.session_state.page > 0:
        st.session_state.page -= 1

if col3.button('Next'):
    if st.session_state.page < len(visualizations) - 1:
        st.session_state.page += 1

st.write(f"Visualization {st.session_state.page + 1} of {len(visualizations)}")

Writing app2.py


In [10]:
!streamlit run app2.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com

34.170.211.52
[K[?25hnpx: installed 22 in 7.112s
your url is: https://five-readers-flash.loca.lt
