In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st

pd.set_option('display.max_columns', 500)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [3]:
# Sales df
path = "data\main_clean.csv"

# Cargar los datos
@st.cache
def load_data():
    # Supongamos que los datos están en un archivo CSV
    df = pd.read_csv(path)
    df['date'] = pd.to_datetime(df['date'])
    df['day'] = pd.to_datetime(df['day'], format='%Y-%m-%d')
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['week'] = df['date'].dt.isocalendar().week
    return df

df = load_data()

# Función para filtrar datos
def filter_data(df, company=None, region=None, city=None):
    if company:
        df = df
    if region:
        df = df[df['region'] == region]
    if city:
        df = df[df['city'] == city]
    return df

# Sidebar para filtros
st.sidebar.header('Filtros')
# selected_company = st.sidebar.selectbox('Compañía', df['company'].unique())
# selected_region = st.sidebar.selectbox('Región', df['region'].unique())
# selected_city = st.sidebar.selectbox('Ciudad', df['city'].unique())

filtered_df = filter_data(df, selected_company, selected_region, selected_city)

# Evolución histórica del total de ventas
st.header('Evolución Histórica del Total de Ventas')
option = st.selectbox('Ver histórico total o por años', ['Total', 'Por años'])
if option == 'Total':
    total_sales = filtered_df.groupby('date')['daily_sales'].sum()
    st.line_chart(total_sales)
else:
    selected_year = st.selectbox('Seleccionar Año', sorted(df['year'].unique()))
    year_data = filtered_df[filtered_df['year'] == selected_year]
    monthly_sales = year_data.groupby('month')['daily_sales'].sum()
    st.line_chart(monthly_sales)

# Producto más vendido y menos vendido en toda la historia
st.header('Producto más vendido y menos vendido en toda la historia')
total_sales_by_product = filtered_df.groupby('product')['daily_sales'].sum()
most_sold_product = total_sales_by_product.idxmax()
least_sold_product = total_sales_by_product.idxmin()
st.write(f'Producto más vendido: {most_sold_product}')
st.write(f'Producto menos vendido: {least_sold_product}')

# Detalles anuales y mensuales
if option == 'Por años':
    st.header(f'Análisis detallado para el año {selected_year}')
    
    # Producto más vendido y menos vendido del año
    annual_sales_by_product = year_data.groupby('product')['daily_sales'].sum()
    most_sold_annual_product = annual_sales_by_product.idxmax()
    least_sold_annual_product = annual_sales_by_product.idxmin()
    st.write(f'Producto más vendido del año: {most_sold_annual_product}')
    st.write(f'Producto menos vendido del año: {least_sold_annual_product}')

    # Ventas mensuales y productos más/menos vendidos cada mes
    for month in sorted(year_data['month'].unique()):
        st.subheader(f'Mes: {month}')
        monthly_data = year_data[year_data['month'] == month]
        monthly_sales_by_product = monthly_data.groupby('product')['daily_sales'].sum()
        most_sold_monthly_product = monthly_sales_by_product.idxmax()
        least_sold_monthly_product = monthly_sales_by_product.idxmin()
        
        st.write(f'Producto más vendido del mes: {most_sold_monthly_product}')
        st.write(f'Producto menos vendido del mes: {least_sold_monthly_product}')
        
        monthly_sales = monthly_data.groupby('date')['daily_sales'].sum()
        st.line_chart(monthly_sales)

# Agregar más filtros y funcionalidades según sea necesario

InternalHashError: module '__main__' has no attribute '__file__'

While caching the body of `load_data()`, Streamlit encountered an
object of type `builtins.function`, which it does not know how to hash.

**In this specific case, it's very likely you found a Streamlit bug so please
[file a bug report here.]
(https://github.com/streamlit/streamlit/issues/new/choose)**

In the meantime, you can try bypassing this error by registering a custom
hash function via the `hash_funcs` keyword in @st.cache(). For example:

```
@st.cache(hash_funcs={builtins.function: my_hash_func})
def my_func(...):
    ...
```

If you don't know where the object of type `builtins.function` is coming
from, try looking at the hash chain below for an object that you do recognize,
then pass that to `hash_funcs` instead:

```
Object of type builtins.function: <function load_data at 0x000001B570E7BCE0>
```

Please see the `hash_funcs` [documentation](https://docs.streamlit.io/library/advanced-features/caching#the-hash_funcs-parameter)
for more details.
            

#### **Prod + vendidos por tiendas**

In [8]:
# Agrupar por año y producto, sumar las ventas diarias y encontrar el producto más vendido en cada año
popular_products = df.groupby([df['date'].dt.year, 'id'])['daily_sales'].sum().reset_index()
popular_products = popular_products.sort_values(by=['date', 'daily_sales'], ascending=[True, False]).groupby('date').head(10)


In [9]:
# Productos populares por año segun el total de daily sales
popular_products.head(10)

Unnamed: 0,date,id,daily_sales
28101,2011,SUPERMARKET_3_586_BOS_2,36574
28102,2011,SUPERMARKET_3_586_BOS_3,34053
23145,2011,SUPERMARKET_3_090_NYC_3,28466
28115,2011,SUPERMARKET_3_587_NYC_3,25464
28100,2011,SUPERMARKET_3_586_BOS_1,23506
28105,2011,SUPERMARKET_3_586_NYC_3,22785
28109,2011,SUPERMARKET_3_586_PHI_3,21474
27791,2011,SUPERMARKET_3_555_BOS_2,20968
27792,2011,SUPERMARKET_3_555_BOS_3,20777
24761,2011,SUPERMARKET_3_252_BOS_2,20133


##### **2015**

In [10]:
# Productos populares 2015 segun el total de daily sales
popular_products[popular_products['date']==2015].head(10)

Unnamed: 0,date,id,daily_sales
150061,2015,SUPERMARKET_3_586_BOS_2,33811
145105,2015,SUPERMARKET_3_090_NYC_3,33279
145405,2015,SUPERMARKET_3_120_NYC_3,31598
150062,2015,SUPERMARKET_3_586_BOS_3,27030
150065,2015,SUPERMARKET_3_586_NYC_3,26792
146721,2015,SUPERMARKET_3_252_BOS_2,21479
145109,2015,SUPERMARKET_3_090_PHI_3,19883
145102,2015,SUPERMARKET_3_090_BOS_3,19415
145103,2015,SUPERMARKET_3_090_NYC_1,18482
150060,2015,SUPERMARKET_3_586_BOS_1,18387


In [11]:
# Agrupar por producto y sumar las ventas diarias
popular_products_total = df.groupby('id')['daily_sales'].sum().reset_index()

# Ordenar los productos por el número total de ventas en orden descendente
popular_products_total = popular_products_total.sort_values(by='daily_sales', ascending=False)

In [12]:
# Productos populares segun el total de daily sales -> Falta quitar las tiendas para ver el total de verdad
popular_products_total.head(10)

Unnamed: 0,id,daily_sales
23145,SUPERMARKET_3_090_NYC_3,250502
28101,SUPERMARKET_3_586_BOS_2,192835
28102,SUPERMARKET_3_586_BOS_3,150122
28105,SUPERMARKET_3_586_NYC_3,134386
23143,SUPERMARKET_3_090_NYC_1,127203
23149,SUPERMARKET_3_090_PHI_3,121434
23141,SUPERMARKET_3_090_BOS_2,119496
23142,SUPERMARKET_3_090_BOS_3,114854
24761,SUPERMARKET_3_252_BOS_2,114153
28100,SUPERMARKET_3_586_BOS_1,112454


In [13]:
# Productos menos populares historicamente segun el total de daily sales -> por tienda!!!
popular_products_total.tail(10)

Unnamed: 0,id,daily_sales
14026,HOME_&_GARDEN_2_307_NYC_4,21
10676,HOME_&_GARDEN_1_512_NYC_4,20
6355,HOME_&_GARDEN_1_073_NYC_3,19
13116,HOME_&_GARDEN_2_216_NYC_4,19
8945,HOME_&_GARDEN_1_336_NYC_3,18
18848,SUPERMARKET_2_057_PHI_2,16
18982,SUPERMARKET_2_071_BOS_3,16
1659,ACCESORIES_1_170_PHI_3,13
29994,SUPERMARKET_3_778_NYC_2,12
5845,HOME_&_GARDEN_1_020_NYC_3,10


#### **Prod + vendidos de toda la compañia**

In [14]:
popular_products_total['item'] = popular_products_total['id'].str.rsplit('_', n=2).str[0]

In [15]:
popular_products_total.head(3)

Unnamed: 0,id,daily_sales,item
23145,SUPERMARKET_3_090_NYC_3,250502,SUPERMARKET_3_090
28101,SUPERMARKET_3_586_BOS_2,192835,SUPERMARKET_3_586
28102,SUPERMARKET_3_586_BOS_3,150122,SUPERMARKET_3_586


In [16]:
# Popular products global
popular_items_total = popular_products_total.groupby('item')['daily_sales'].sum().reset_index()

In [17]:
popular_items_total = popular_items_total.sort_values('daily_sales',ascending=False)

##### **Historicamente**

In [18]:
# Mas populasres Global
popular_items_total.head(10)

Unnamed: 0,item,daily_sales
2314,SUPERMARKET_3_090,1002529
2810,SUPERMARKET_3_586,920242
2476,SUPERMARKET_3_252,565299
2779,SUPERMARKET_3_555,491287
2937,SUPERMARKET_3_714,396172
2811,SUPERMARKET_3_587,396119
2918,SUPERMARKET_3_694,390001
2450,SUPERMARKET_3_226,363082
2426,SUPERMARKET_3_202,295689
2946,SUPERMARKET_3_723,284333


In [19]:
# Mas populasres Global
popular_items_total.tail(10)

Unnamed: 0,item,daily_sales
438,ACCESORIES_2_023,800
1402,HOME_&_GARDEN_2_307,796
1226,HOME_&_GARDEN_2_130,789
1340,HOME_&_GARDEN_2_245,780
526,ACCESORIES_2_111,770
1101,HOME_&_GARDEN_2_005,757
499,ACCESORIES_2_084,746
1271,HOME_&_GARDEN_2_175,718
534,ACCESORIES_2_119,660
1197,HOME_&_GARDEN_2_101,568
