**Natalia Castilla Reyes**

**Miguel Angel Quintero**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


-----------------------------------------------------------------------

# 📑 Pronóstico de las ventas de diversos productos y evaluación de la satisfacción de los clientes de Walmart (US) mediante redes neuronales.

----------------------------------------------------------

In [None]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.4.8-bin-hadoop2.7"
#######################SPARK
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://downloads.apache.org/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz
!tar xf spark-2.4.8-bin-hadoop2.7.tgz

In [None]:
#@title Librerias necesarias
#Librerias generales

import time
import math
import datetime

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm as tqdm
import pickle
import seaborn as sns
from matplotlib import colors
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import dask
import joblib
from dask.dataframe import DataFrame as dd
import warnings

#Configuraciones
warnings.filterwarnings("ignore")
pd.set_option('max_columns', 50)
plt.style.use('bmh')
color_pal = plt.rcParams['axes.prop_cycle'].by_key()['color']
pd.options.display.max_columns = 400

#########Modelos

import xgboost as xgb
import lightgbm as lgb
from lightgbm import LGBMRegressor




from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

#Tensor Flow
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

# importa objetos de keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LSTM
print("Versión de Tensorflow: ", tf.__version__)
# optimizador
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras import callbacks
from tensorflow.keras.callbacks import EarlyStopping


##Para definir la funcion de perdida que necesitaos en keeras

from keras import backend as K

#Variable global

DAYS_PRED = 28
ruta = "/content/drive/MyDrive/Proyecto_topicos/"

Versión de Tensorflow:  2.5.0


In [None]:
df = spark.read.option("header", "true").option("encoding", "latin1").parquet(ruta+"df_total.parquet")
print("Cantidad de datos en df:",df.count())

# 3. 🔍 **Entendimiento de los datos (Pronóstico)**


* En esta sección realizaremos el análisis detallado de las fuentes de datos que se tienen. 
* Para realizar este análisis emplearemos: Pandas y Pyspark (para algunos procesos debido a la volumetría de los datos).
------------------------------------------------------

**Exploracion**
---------------------

In [None]:
#@title Funciones globales

#Downcast para liberar memoria

def downcast(df):
  
    """ Esta función nos permite adecuar el tipo de datos que estamos empleando de tal manera que podamos 
        liberar memoria"""

    cols = df.dtypes.index.tolist()
    types = df.dtypes.values.tolist()
    for i,t in enumerate(types):
        if 'int' in str(t):
            if df[cols[i]].min() > np.iinfo(np.int8).min and df[cols[i]].max() < np.iinfo(np.int8).max:
                df[cols[i]] = df[cols[i]].astype(np.int8)
            elif df[cols[i]].min() > np.iinfo(np.int16).min and df[cols[i]].max() < np.iinfo(np.int16).max:
                df[cols[i]] = df[cols[i]].astype(np.int16)
            elif df[cols[i]].min() > np.iinfo(np.int32).min and df[cols[i]].max() < np.iinfo(np.int32).max:
                df[cols[i]] = df[cols[i]].astype(np.int32)
            else:
                df[cols[i]] = df[cols[i]].astype(np.int64)
        elif 'float' in str(t):
            if df[cols[i]].min() > np.finfo(np.float16).min and df[cols[i]].max() < np.finfo(np.float16).max:
                df[cols[i]] = df[cols[i]].astype(np.float16)
            elif df[cols[i]].min() > np.finfo(np.float32).min and df[cols[i]].max() < np.finfo(np.float32).max:
                df[cols[i]] = df[cols[i]].astype(np.float32)
            else:
                df[cols[i]] = df[cols[i]].astype(np.float64)
        elif t == np.object:
            if cols[i] == 'date':
                df[cols[i]] = pd.to_datetime(df[cols[i]], format='%Y-%m-%d')
            else:
                df[cols[i]] = df[cols[i]].astype('category')
    return df  


def introduce_nulls(df, color_map=False):
    idx = pd.date_range(df.date.dt.date.min(), df.date.dt.date.max())
    df = df.set_index('date')
    df = df.reindex(idx)
    df.reset_index(inplace=True)
    if color_map==True:
      df.rename(columns={'level_0':'date'},inplace=True)
    else: 
      df.rename(columns={'index':'date'},inplace=True)
    return df

def plot_metric(df,state,store,metric, color_map):
    store_sales = df[(df['state_id']==state)&(df['store_id']==store)&(df['date']<='2016-05-22')]
    food_sales = store_sales[store_sales['cat_id']=='FOODS']
    store_sales = store_sales.groupby(['date','snap_'+state],as_index=False)['sold','revenue'].sum()
    snap_sales = store_sales[store_sales['snap_'+state]==1]
    non_snap_sales = store_sales[store_sales['snap_'+state]==0]
    food_sales = food_sales.groupby(['date','snap_'+state],as_index=False)['sold','revenue'].sum()
    snap_foods = food_sales[food_sales['snap_'+state]==1]
    non_snap_foods = food_sales[food_sales['snap_'+state]==0]
    non_snap_sales = introduce_nulls(non_snap_sales)
    snap_sales = introduce_nulls(snap_sales)
    non_snap_foods = introduce_nulls(non_snap_foods)
    snap_foods = introduce_nulls(snap_foods)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=non_snap_sales['date'],y=non_snap_sales[metric],
                           name='Total '+metric+'(Non-SNAP)'))
    fig.add_trace(go.Scatter(x=snap_sales['date'],y=snap_sales[metric],
                           name='Total '+metric+'(SNAP)'))
    fig.add_trace(go.Scatter(x=non_snap_foods['date'],y=non_snap_foods[metric],
                           name='Food '+metric+'(Non-SNAP)'))
    fig.add_trace(go.Scatter(x=snap_foods['date'],y=snap_foods[metric],
                           name='Food '+metric+'(SNAP)'))
    fig.update_yaxes(title_text='Total de productos vendidos' if metric=='sold' else 'Total de ingresos($)')
    fig.update_layout(template='seaborn',title=store)
    fig.update_layout(
        xaxis=dict(
        #autorange=True,
        range = ['2014-01-01','2016-05-22'],
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(count=2,
                     label="2y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            autorange=True,
        ),
        type="date"
    ), title='Tienda:'+' '+ store)
    return fig


def calmap(cal_data, state, store, scale, color_map):
    cal_data = cal_data[(cal_data['state_id']==state)&(cal_data['store_id']==store)]
    years = cal_data.year.unique().tolist()
    fig = make_subplots(rows=len(years),cols=1,shared_xaxes=True,vertical_spacing=0.005)
    r=1
    for year in years:
        data = cal_data[cal_data['year']==year]
        data = introduce_nulls(data, color_map)
        fig.add_trace(go.Heatmap(
            z=data.sold,
            x=data.week,
            y=data.day_name,
            hovertext=data.date.dt.date,
            coloraxis = "coloraxis",name=year,
        ),r,1)
        fig.update_yaxes(title_text=year,tickfont=dict(size=5),row = r,col = 1)
        r+=1
    fig.update_xaxes(range=[1,53],tickfont=dict(size=10), nticks=53)
    fig.update_layout(coloraxis = {'colorscale':scale})
    fig.update_layout(template='seaborn', title='Tienda:'+' '+ store)
    return fig

**Cargando la información procesada:**

In [None]:
df = pd.read_parquet(ruta+"df_total_aux_1.parquet")

In [None]:
df[(df['store_id']=='CA_3')&(df['year']>=2015)]

Unnamed: 0,store_id,item_id,wm_yr_wk,d,id,dept_id,cat_id,state_id,sold,date,weekday,wday,month,year,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI,sell_price
1179611,CA_3,FOODS_1_001,11502,d_1471,FOODS_1_001_CA_3_evaluation,FOODS_1,FOODS,CA,0.0,2015-02-07,Saturday,1,2,2015.0,,,,,1,1,0,2.24
1179612,CA_3,FOODS_1_001,11502,d_1472,FOODS_1_001_CA_3_evaluation,FOODS_1,FOODS,CA,1.0,2015-02-08,Sunday,2,2,2015.0,,,,,1,0,1,2.24
1179613,CA_3,FOODS_1_001,11502,d_1473,FOODS_1_001_CA_3_evaluation,FOODS_1,FOODS,CA,0.0,2015-02-09,Monday,3,2,2015.0,,,,,1,1,1,2.24
1179614,CA_3,FOODS_1_001,11502,d_1474,FOODS_1_001_CA_3_evaluation,FOODS_1,FOODS,CA,1.0,2015-02-10,Tuesday,4,2,2015.0,,,,,1,0,0,2.24
1179615,CA_3,FOODS_1_001,11502,d_1475,FOODS_1_001_CA_3_evaluation,FOODS_1,FOODS,CA,2.0,2015-02-11,Wednesday,5,2,2015.0,,,,,0,1,1,2.24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55036191,CA_3,HOUSEHOLD_2_516,11613,d_1914,HOUSEHOLD_2_516_CA_3_evaluation,HOUSEHOLD_2,HOUSEHOLD,CA,0.0,2016-04-25,Monday,3,4,2016.0,,,,,0,0,0,5.94
55036192,CA_3,HOUSEHOLD_2_516,11613,d_1915,HOUSEHOLD_2_516_CA_3_evaluation,HOUSEHOLD_2,HOUSEHOLD,CA,2.0,2016-04-26,Tuesday,4,4,2016.0,,,,,0,0,0,5.94
55036193,CA_3,HOUSEHOLD_2_516,11613,d_1916,HOUSEHOLD_2_516_CA_3_evaluation,HOUSEHOLD_2,HOUSEHOLD,CA,0.0,2016-04-27,Wednesday,5,4,2016.0,,,,,0,0,0,5.94
55036194,CA_3,HOUSEHOLD_2_516,11613,d_1917,HOUSEHOLD_2_516_CA_3_evaluation,HOUSEHOLD_2,HOUSEHOLD,CA,0.0,2016-04-28,Thursday,6,4,2016.0,,,,,0,0,0,5.94


In [None]:
df = pd.read_parquet(ruta+"df_total_aux_1.parquet")
print("dimensiòn del dataframe:", df.shape)

dimensiòn del dataframe: (26617770, 22)


In [None]:
#Modificando algunas variables
df_bd = np.round(df.memory_usage().sum()/(1024*1024),1)
df['wday'] =df['wday'].astype(int)
df['year'] =df['year'].astype(float)
df['month'] =df['month'].astype(int)
df['snap_CA'] =df['snap_CA'].astype(int)
df['snap_TX'] =df['snap_TX'].astype(int)
df['snap_WI'] =df['snap_WI'].astype(int)
df['sell_price'] =df['sell_price'].astype(float)
df['sold'] =df['sold'].astype(float)
df['date'] =pd.to_datetime( df['date'])
df.head()

Unnamed: 0,store_id,item_id,wm_yr_wk,d,id,dept_id,cat_id,state_id,sold,date,weekday,wday,month,year,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI,sell_price
133,CA_1,FOODS_1_001,11403,d_1114,FOODS_1_001_CA_1_evaluation,FOODS_1,FOODS,CA,0.0,2014-02-15,Saturday,1,2,2014.0,,,,,0,1,1,2.24
134,CA_1,FOODS_1_001,11403,d_1115,FOODS_1_001_CA_1_evaluation,FOODS_1,FOODS,CA,2.0,2014-02-16,Sunday,2,2,2014.0,,,,,0,0,0,2.24
135,CA_1,FOODS_1_001,11403,d_1116,FOODS_1_001_CA_1_evaluation,FOODS_1,FOODS,CA,1.0,2014-02-17,Monday,3,2,2014.0,PresidentsDay,National,,,0,0,0,2.24
136,CA_1,FOODS_1_001,11403,d_1117,FOODS_1_001_CA_1_evaluation,FOODS_1,FOODS,CA,0.0,2014-02-18,Tuesday,4,2,2014.0,,,,,0,0,0,2.24
137,CA_1,FOODS_1_001,11403,d_1118,FOODS_1_001_CA_1_evaluation,FOODS_1,FOODS,CA,0.0,2014-02-19,Wednesday,5,2,2014.0,,,,,0,0,0,2.24


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 26617770 entries, 133 to 59181089
Data columns (total 22 columns):
 #   Column        Dtype         
---  ------        -----         
 0   store_id      object        
 1   item_id       object        
 2   wm_yr_wk      object        
 3   d             object        
 4   id            object        
 5   dept_id       object        
 6   cat_id        object        
 7   state_id      object        
 8   sold          float64       
 9   date          datetime64[ns]
 10  weekday       object        
 11  wday          int64         
 12  month         int64         
 13  year          float64       
 14  event_name_1  object        
 15  event_type_1  object        
 16  event_name_2  object        
 17  event_type_2  object        
 18  snap_CA       int64         
 19  snap_TX       int64         
 20  snap_WI       int64         
 21  sell_price    float64       
dtypes: datetime64[ns](1), float64(3), int64(5), object(13)
memory 

In [None]:
#Proceso para liberar memoria
%%time
df = downcast(df)
df_ad = np.round(df.memory_usage().sum()/(1024*1024),1)

dic = {'DataFrame':['DataFrame total'],
       'Antes DWNC':[df_bd],
       'Después DWNC':[df_ad]}

memory = pd.DataFrame(dic)
memory = pd.melt(memory, id_vars='DataFrame', var_name='Status', value_name='Memory (MB)')
memory.sort_values('Memory (MB)',inplace=True)
fig = px.bar(memory, x='DataFrame', y='Memory (MB)', color='Status', barmode='group', text='Memory (MB)')
fig.update_traces(texttemplate='%{text} MB', textposition='outside')
fig.update_layout(template='seaborn', title='Efecto del proceso de DWNC')
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/reduccion_memoria.html', auto_open=False)
fig.show()

CPU times: user 29 s, sys: 2.3 s, total: 31.3 s
Wall time: 31.3 s


**Clasificación de la información**

In [None]:
group = df.groupby(['state_id','store_id','cat_id','dept_id'],as_index=False)['item_id'].count().dropna()
group['USA'] = 'USA'
group.rename(columns={'state_id':'Estado','store_id':'Tienda','cat_id':'Categoría','dept_id':'Departamento','item_id':'Cantidad'},inplace=True)
fig = px.treemap(group, path=['USA', 'Estado', 'Tienda', 'Categoría', 'Departamento'], values='Cantidad',
                  color='Cantidad',
                  color_continuous_scale= px.colors.sequential.Sunset,
                  title='Walmart: Distribución de Productos',)
fig.update_layout(template='seaborn')
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/explicacion_distribucion_datos.html', auto_open=False)
fig.show(renderer="colab")

**Estudio de la distribución de los precios**

In [None]:
group_price_store = pd.DataFrame(df.groupby(['state_id','store_id','item_id'])['sell_price'].mean().dropna()).reset_index()
fig = px.violin(group_price_store, x='store_id', color='state_id', y='sell_price',box=True, hover_name='item_id', labels={"state_id": "Estado"})
fig.update_xaxes(title_text='Tienda')
fig.update_yaxes(title_text='Precios de Venta($)')
fig.update_layout(template='seaborn',title='Distribución de los precios por tienda')
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/precios_por_tienda.html', auto_open=False)
fig.show()

Notas sobre la distribución de precios por tienda:
* Se observa que la distribucion de los precios de venta respecto a las 10 tiendas es uniforme. El valor de la mediana fluctúa entre los valores 3.34 y 3.42.


**Análisis de la distribución de precios por categoría**

In [None]:
group_price_cat = pd.DataFrame(df.groupby(['store_id','cat_id','item_id'],as_index=False)['sell_price'].mean().dropna()).reset_index()
fig = px.violin(group_price_cat, x='store_id', color='cat_id', y='sell_price',box=True, hover_name='item_id',  labels={"cat_id": "Categoría"})
fig.update_xaxes(title_text='Tienda')
fig.update_yaxes(title_text='Precio de Venta($)')
fig.update_layout(template='seaborn',title='Distribución de los precios por categoría',)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/precios_x_categoria.html', auto_open=False)
fig.show()

Notas sobre el gráfico:

* Los items con categoría "FOODS" suelen ser más económicos que los items de las dos categorías restantes.

**Análisis de la cantidad de productos vendidos por tienda**

In [None]:
group = pd.DataFrame(df.groupby(['year','date','state_id','store_id'], as_index=False)['sold'].sum().dropna()).reset_index()
fig = px.violin(group, x='store_id', color='state_id', y='sold',box=True, labels={"state_id": "Estado"})
fig.update_xaxes(title_text='Tienda')
fig.update_yaxes(title_text='Total de productos vendidos')
fig.update_layout(template='seaborn',title='Distribución de productos vendidos por tienda')
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/cantidad_productos_tienda.html', auto_open=False)
fig.show()

**Análisis de los productos vendidos respecto al tiempo transcurrido**

In [None]:
#@title code
fig = go.Figure()
title = 'Productos vendidos en cada tienda respecto al tiempo de estudio'
years = group.year.unique().tolist()
buttons = []
y=3
for state in group.state_id.unique().tolist():
    group_state = group[group['state_id']==state]
    for store in group_state.store_id.unique().tolist():
        group_state_store = group_state[group_state['store_id']==store]
        fig.add_trace(go.Scatter(name=store, x=group_state_store['date'], y=group_state_store['sold'], showlegend=True, 
                                   yaxis='y'+str(y) if y!=1 else 'y'))
    y-=1

fig.update_layout(
        xaxis=dict(
        #autorange=True,
        range = ['2014-01-01','2016-05-22'],
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(count=2,
                     label="2y",
                     step="year",
                     stepmode="backward"),
               dict(step="all")
            ])
        ),
        rangeslider=dict(
            autorange=True,
        ),
        type="date"
    ),
    yaxis=dict(
        anchor="x",
        autorange=True,
        domain=[0, 0.33],
        mirror=True,
        showline=True,
        side="left",
        tickfont={"size":10},
        tickmode="auto",
        ticks="",
        title='WI',
        titlefont={"size":20},
        type="linear",
        zeroline=False
    ),
    yaxis2=dict(
        anchor="x",
        autorange=True,
        domain=[0.33, 0.66],
        mirror=True,
        showline=True,
        side="left",
        tickfont={"size":10},
        tickmode="auto",
        ticks="",
        title = 'TX',
        titlefont={"size":20},
        type="linear",
        zeroline=False
    ),
    yaxis3=dict(
        anchor="x",
        autorange=True,
        domain=[0.66, 1],
        mirror=True,
        showline=True,
        side="left",
        tickfont={"size":10},
        tickmode="auto",
        ticks='',
        title="CA",
        titlefont={"size":20},
        type="linear",
        zeroline=False
    )
    )
fig.update_layout(template='seaborn', title=title)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/productos_vendidos_tienda_por_fecha.html', auto_open=False)
fig.show()

<h2 style="background-color:DodgerBlue; color:white" > Análisis por estado</h2>
  <a class="list-group-item list-group-item-action" data-toggle="list" href="#cal" role="tab" aria-controls="profile">California<span class="badge badge-primary badge-pill">1</span></a>
  <a class="list-group-item list-group-item-action" data-toggle="list" href="#tex" role="tab" aria-controls="messages">Texas<span class="badge badge-primary badge-pill">2</span></a>
  <a class="list-group-item list-group-item-action"  data-toggle="list" href="#wis" role="tab" aria-controls="settings">Wisconsin<span class="badge badge-primary badge-pill">3</span></a>
<br>


En esta sección se analizará las ventas y las ganancias de cada una de las tiendas. Se realizarán tres gráficos por tienda, los detalles de cada uno de los gráficos realizados se encuentran a continuación:

* Análisis de las ventas diarias de cada tienda.

* Análisis de las ganancias diarias de cada tienda.

* Mapa de calor con las ventas diarias.

**Primero definimos la variable ingresos**

In [None]:
df['revenue'] = df['sold']*df['sell_price']

In [None]:
cal_data = group.copy()
cal_data = cal_data[cal_data.date <= '22-05-2016']
cal_data['week'] = cal_data.date.dt.weekofyear
cal_data['day_name'] = cal_data.date.dt.day_name()
cal_data['date'] = pd.to_datetime( cal_data['date'],format = '%Y-%b%d')

<a id="cal" class="btn btn-primary btn-lg btn-block active" role="button" aria-pressed="true" style="color:white;">California</a>
<a id='csn'><span class="label label-info">Tiendas ubicadas en California:</span></a>
<nav aria-label="Store Navigator">
  <ul class="pagination">
    <li class="page-item"><a class="page-link" href='#C1'>CA_1</a></li>
    <li class="page-item"><a class="page-link" href="#C2">CA_2</a></li>
    <li class="page-item"><a class="page-link" href="#C3">CA_3</a></li>
    <li class="page-item"><a class="page-link" href="#C4">CA_4</a></li>
  </ul>
</nav>

### <a id='C2'>CA_1</a>
<a href='#csn'><span class="label label-info">Ver tiendas</span></a>
------------------------------------------

In [None]:
fig = plot_metric(df,'CA','CA_1','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_CA_1.html', auto_open=False)
fig.show()

**Análisis del ingreso**

In [None]:
fig = plot_metric(df,'CA','CA_1','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_CA_1.html', auto_open=False)
fig.show()

In [None]:
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'CA', 'CA_1', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_CA_1.html', auto_open=False)
fig.show()

### <a id='C2'>CA_2</a>
<a href='#csn'><span class="label label-info">Ver tiendas</span></a>
------------------------------------------

In [None]:
fig = plot_metric(df,'CA','CA_2','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_CA_2.html', auto_open=False)
fig.show()

In [None]:
fig = plot_metric(df,'CA','CA_2','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_CA_2.html', auto_open=False)
fig.show()

In [None]:
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'CA', 'CA_2', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_CA_2.html', auto_open=False)
fig.show()

### <a id='C2'>CA_3</a>
<a href='#csn'><span class="label label-info">Ver tiendas</span></a>
------------------------------------------

In [None]:
fig = plot_metric(df,'CA','CA_3','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_CA_3.html', auto_open=False)
fig.show()

In [None]:
fig = plot_metric(df,'CA','CA_3','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_CA_3.html', auto_open=False)
fig.show()

In [None]:
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'CA', 'CA_3', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_CA_3.html', auto_open=False)
fig.show()

### <a id='C2'>CA_4</a>
<a href='#csn'><span class="label label-info">Ver tiendas</span></a>
------------------------------------------

In [None]:
fig = plot_metric(df,'CA','CA_4','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_CA_4.html', auto_open=False)
fig.show()

In [None]:
fig = plot_metric(df,'CA','CA_4','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_CA_4.html', auto_open=False)
fig.show()

In [None]:
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'CA', 'CA_4', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_CA_4.html', auto_open=False)
fig.show()

### <a id="cal" class="btn btn-primary btn-lg btn-block active" role="button" aria-pressed="true" style="color:white;">Texas</a>
<a id='csn'><span class="label label-info">Tiendas ubicadas en Texas:</span></a>
<nav aria-label="Store Navigator">
  <ul class="pagination">
    <li class="page-item"><a class="page-link" href='#C1'>TX_1</a></li>
    <li class="page-item"><a class="page-link" href="#C2">TX_2</a></li>
    <li class="page-item"><a class="page-link" href="#C3">TX_3</a></li>
  </ul>
</nav>


### <a id='C2'>TX_1</a>
<a href='#csn'><span class="label label-info">Ver tiendas</span></a>
------------------------------------------

In [None]:
fig = plot_metric(df,'TX','TX_1','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_TX_1.html', auto_open=False)
fig.show()

In [None]:
fig = plot_metric(df,'TX','TX_1','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_TX_1.html', auto_open=False)
fig.show()

In [None]:
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'TX', 'TX_1', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_TX_1.html', auto_open=False)
fig.show()

### <a id='C2'>TX_2</a>
<a href='#csn'><span class="label label-info">Ver tiendas</span></a>
------------------------------------------

In [None]:
fig = plot_metric(df,'TX','TX_2','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_TX_2.html', auto_open=False)
fig.show()

In [None]:
fig = plot_metric(df,'TX','TX_2','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_TX_2.html', auto_open=False)
fig.show()

In [None]:
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'TX', 'TX_2', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_TX_2.html', auto_open=False)
fig.show()

### <a id='C2'>TX_3</a>
<a href='#csn'><span class="label label-info">Ver tiendas</span></a>
------------------------------------------

In [None]:
fig = plot_metric(df,'TX','TX_3','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_TX_3.html', auto_open=False)
fig.show()

In [None]:
fig = plot_metric(df,'TX','TX_3','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_TX_3.html', auto_open=False)
fig.show()

In [None]:
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'TX', 'TX_3', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_TX_3.html', auto_open=False)
fig.show()

### <a id="cal" class="btn btn-primary btn-lg btn-block active" role="button" aria-pressed="true" style="color:white;">  Wisconsin </a>
<a id='csn'><span class="label label-info">Tiendas ubicadas en Wisconsin:</span></a>
<nav aria-label="Store Navigator">
  <ul class="pagination">
    <li class="page-item"><a class="page-link" href='#C1'> WI_1</a></li>
    <li class="page-item"><a class="page-link" href="#C2"> WI_2</a></li>
    <li class="page-item"><a class="page-link" href="#C3"> WI_3</a></li>
  </ul>
</nav>

### <a id='C2'>WI_1</a>
<a href='#csn'><span class="label label-info">Ver tiendas</span></a>
------------------------------------------

In [None]:
#@title
fig = plot_metric(df,'WI','WI_1','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_WI_1.html', auto_open=False)
fig.show()

In [None]:
#@title
fig = plot_metric(df,'WI','WI_1','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_WI_1.html', auto_open=False)
fig.show()

In [None]:
#@title
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'WI', 'WI_1', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_WI_1.html', auto_open=False)
fig.show()

### <a id='C2'>WI_2</a>
<a href='#csn'><span class="label label-info">Ver tiendas</span></a>
------------------------------------------

In [None]:
fig = plot_metric(df,'WI','WI_2','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_WI_2.html', auto_open=False)
fig.show()

In [None]:
fig = plot_metric(df,'WI','WI_2','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_WI_2.html', auto_open=False)
fig.show()

In [None]:
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'WI', 'WI_2', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_WI_2.html', auto_open=False)
fig.show()

### <a id='C2'>WI_3</a>
<a href='#csn'><span class="label label-info">Ver tiendas</span></a>
------------------------------------------

In [None]:
fig = plot_metric(df,'WI','WI_3','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_WI_3.html', auto_open=False)
fig.show()

In [None]:
fig = plot_metric(df,'WI','WI_3','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_WI_3.html', auto_open=False)
fig.show()

In [None]:
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'WI', 'WI_3', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_WI_3.html', auto_open=False)
fig.show()