\[_In case you’re unable to see the atoti visualizations in GitHub, try viewing the notebook in [nbviewer](https://nbviewer.org/github/atoti/notebooks/blob/master/notebooks/burritos/main.ipynb)._]

In [1]:
# import relevant libraries
import re

import atoti as tt
import numpy as np
import pandas as pd

In [2]:
# load in data
df = pd.read_csv("./data/burritos_01022018.csv")

In [3]:
# explore column headers
df.columns

Index(['Location', 'Burrito', 'Date', 'Neighborhood', 'Address', 'URL', 'Yelp',
       'Google', 'Chips', 'Cost', 'Hunger', 'Mass (g)', 'Density (g/mL)',
       'Length', 'Circum', 'Volume', 'Tortilla', 'Temp', 'Meat', 'Fillings',
       'Meat:filling', 'Uniformity', 'Salsa', 'Synergy', 'Wrap', 'overall',
       'Rec', 'Reviewer', 'Notes', 'Unreliable', 'NonSD', 'Beef', 'Pico',
       'Guac', 'Cheese', 'Fries', 'Sour cream', 'Pork', 'Chicken', 'Shrimp',
       'Fish', 'Rice', 'Beans', 'Lettuce', 'Tomato', 'Bell peper', 'Carrots',
       'Cabbage', 'Sauce', 'Salsa.1', 'Cilantro', 'Onion', 'Taquito',
       'Pineapple', 'Ham', 'Chile relleno', 'Nopales', 'Lobster', 'Queso',
       'Egg', 'Mushroom', 'Bacon', 'Sushi', 'Avocado', 'Corn', 'Zucchini'],
      dtype='object')

In [4]:
# look at first 5 rows of data
df.head()

Unnamed: 0,Location,Burrito,Date,Neighborhood,Address,URL,Yelp,Google,Chips,Cost,...,Nopales,Lobster,Queso,Egg,Mushroom,Bacon,Sushi,Avocado,Corn,Zucchini
0,Donato's taco shop,California,1/18/2016,Miramar,6780 Miramar Rd,http://donatostacoshop.net/,3.5,4.2,,6.49,...,,,,,,,,,,
1,Oscar's Mexican food,California,1/24/2016,San Marcos,225 S Rancho Santa Fe Rd,http://www.yelp.com/biz/oscars-mexican-food-sa...,3.5,3.3,,5.45,...,,,,,,,,,,
2,Oscar's Mexican food,Carnitas,1/24/2016,,,,,,,4.85,...,,,,,,,,,,
3,Oscar's Mexican food,Carne asada,1/24/2016,,,,,,,5.25,...,,,,,,,,,,
4,Pollos Maria,California,1/27/2016,Carlsbad,3055 Harding St,http://pollosmaria.com/,4.0,3.8,x,6.59,...,,,,,,,,,,


In [5]:
# explore shape and central tendency
df.describe()

Unnamed: 0,Yelp,Google,Cost,Hunger,Mass (g),Density (g/mL),Length,Circum,Volume,Tortilla,Temp,Meat,Fillings,Meat:filling,Uniformity,Salsa,Synergy,Wrap,overall,Queso
count,82.0,82.0,378.0,382.0,22.0,22.0,251.0,249.0,249.0,385.0,365.0,373.0,383.0,377.0,383.0,363.0,383.0,383.0,383.0,0.0
mean,3.89878,4.17439,7.04828,3.499895,546.181818,0.675277,20.072988,22.098996,0.785462,3.486104,3.741096,3.596247,3.527546,3.564403,3.422324,3.348485,3.576371,3.995561,3.604813,
std,0.470748,0.377389,1.517983,0.808791,144.445619,0.080468,2.060584,1.79501,0.153465,0.787282,0.975079,0.835896,0.812342,0.987858,1.061032,0.927714,0.896275,1.107876,0.761901,
min,2.5,2.9,2.99,0.5,350.0,0.56,15.0,17.0,0.4,1.0,1.0,1.0,1.0,0.5,0.0,0.0,1.0,0.0,1.0,
25%,3.5,4.0,6.25,3.0,450.0,0.619485,18.5,21.0,0.68,3.0,3.0,3.0,3.0,3.0,2.5,3.0,3.0,3.5,3.0,
50%,4.0,4.2,6.95,3.5,540.0,0.658099,20.0,22.0,0.77,3.5,4.0,3.75,3.5,4.0,3.5,3.5,3.8,4.0,3.75,
75%,4.0,4.4,7.75,4.0,595.0,0.721726,21.5,23.0,0.88,4.0,4.5,4.0,4.0,4.0,4.0,4.0,4.0,5.0,4.1,
max,4.5,5.0,25.0,5.0,925.0,0.865672,26.0,29.0,1.54,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,


In [6]:
# use regular expression to remove () from columns
# use .replace() to remove : from columns
df.columns = [re.sub("([\(\[]).*?([\)\]])", "", x).strip() for x in df.columns]
df.columns = [x.replace(":", "_").strip() for x in df.columns]

In [7]:
df.columns

Index(['Location', 'Burrito', 'Date', 'Neighborhood', 'Address', 'URL', 'Yelp',
       'Google', 'Chips', 'Cost', 'Hunger', 'Mass', 'Density', 'Length',
       'Circum', 'Volume', 'Tortilla', 'Temp', 'Meat', 'Fillings',
       'Meat_filling', 'Uniformity', 'Salsa', 'Synergy', 'Wrap', 'overall',
       'Rec', 'Reviewer', 'Notes', 'Unreliable', 'NonSD', 'Beef', 'Pico',
       'Guac', 'Cheese', 'Fries', 'Sour cream', 'Pork', 'Chicken', 'Shrimp',
       'Fish', 'Rice', 'Beans', 'Lettuce', 'Tomato', 'Bell peper', 'Carrots',
       'Cabbage', 'Sauce', 'Salsa.1', 'Cilantro', 'Onion', 'Taquito',
       'Pineapple', 'Ham', 'Chile relleno', 'Nopales', 'Lobster', 'Queso',
       'Egg', 'Mushroom', 'Bacon', 'Sushi', 'Avocado', 'Corn', 'Zucchini'],
      dtype='object')

In [8]:
# check if there are nulls in column
df.isnull().any()
# check the % of the column that is null
df.isnull().sum() / df.shape[0]

Location        0.000000
Burrito         0.000000
Date            0.000000
Neighborhood    0.774026
Address         0.784416
                  ...   
Bacon           0.992208
Sushi           0.994805
Avocado         0.966234
Corn            0.992208
Zucchini        0.997403
Length: 66, dtype: float64

In [9]:
# create an atoti session and store dashboard files in ./content
session = tt.Session(
    port=9000,
    user_content_storage="./content",
)

In [10]:
# scale circumference, volume, fillings, length, mass, and cost data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
burrito_vars_norm = df.loc[:, ["Circum", "Volume", "Length", "Mass", "Cost"]]

# change 0-1 scaline to 0-10 scale for readability
bnorms = scaler.fit_transform(burrito_vars_norm) * 10

# create new columns for normalized values in our datframe (df)
df[["Circum_norm", "Volume_norm", "Length_norm", "Mass_norm", "Cost_norm"]] = bnorms

In [11]:
# create new dataframe for variables so they can be used in a visualization platform
# melt reshapes the data so that we have 1 column with circum, volume, length, mass, and cost as features & 1 with values.
# we increaste the number of rows by 5x and decrease the number of columns by 4.
burrito_variables = pd.melt(
    df.reset_index(),
    id_vars=["Location", "Burrito"],
    value_vars=["Circum_norm", "Volume_norm", "Length_norm", "Mass_norm", "Cost_norm"],
)
burrito_variables

Unnamed: 0,Location,Burrito,variable,value
0,Donato's taco shop,California,Circum_norm,
1,Oscar's Mexican food,California,Circum_norm,
2,Oscar's Mexican food,Carnitas,Circum_norm,
3,Oscar's Mexican food,Carne asada,Circum_norm,
4,Pollos Maria,California,Circum_norm,
...,...,...,...,...
1920,Rigoberto's Taco Shop,California,Cost_norm,1.753748
1921,Rigoberto's Taco Shop,California,Cost_norm,1.753748
1922,Burrito Box,Steak with guacamole,Cost_norm,3.866424
1923,Taco Stand,California,Cost_norm,2.226261


In [12]:
# upload dataframe to atoti session
burrito_table = session.read_pandas(df, table_name="burritos")

In [13]:
burrito_table.head()

Unnamed: 0,Location,Burrito,Date,Neighborhood,Address,URL,Yelp,Google,Chips,Cost,...,Bacon,Sushi,Avocado,Corn,Zucchini,Circum_norm,Volume_norm,Length_norm,Mass_norm,Cost_norm
0,Donato's taco shop,California,1/18/2016,Miramar,6780 Miramar Rd,http://donatostacoshop.net/,3.5,4.2,,6.49,...,,,,,,,,,,1.590186
1,Nico's Taco Shop,Carnitas,1/30/2016,,,,,,,6.99,...,,,,,,,,,,1.817356
2,Taco stand,California,5/6/2016,,,,,,,7.49,...,,,,,,4.166667,2.894737,3.636364,,2.044525
3,Lolita's taco shop,2 in 1,5/12/2016,,,,,,,8.75,...,,,,,,4.354167,2.631579,2.527273,,2.616992
4,Rigoberto's Taco Shop,Carnitas,5/13/2016,,,,,,,7.5,...,,,,,,4.583333,4.824561,7.727273,,2.049069


In [14]:
# create data cube
cube = session.create_cube(burrito_table)

In [15]:
# create hierarchies, levels, and measures
h = cube.hierarchies
l = cube.levels
m = cube.measures

In [16]:
# create new measures (examples)
m["five"] = 5
m["lenXwrap"] = m["Length.MEAN"] * m["Wrap.MEAN"]

In [17]:
# create pivot table visual / example
session.visualize("exploration 1")

In [18]:
# create scatterplot visual /example
session.visualize("scatter plot neighborhood yelp google")

In [19]:
# add reformatted data to table to visualize
burrito_var_table = session.read_pandas(
    burrito_variables,
    table_name="burrito_variables",
    keys=["Location", "Burrito", "variable"],
)

In [20]:
# join main dataframe with new burrito_var_table
burrito_table.join(burrito_var_table)

In [21]:
# create value measure in atoti with values for each of the burrito variables that we made above (mass, circum, cost, etc.)
m["value"] = tt.agg.mean(burrito_var_table["value"])
m["aggvalue"] = tt.agg.mean(
    m["value"], scope=tt.OriginScope(l["Location"], l["Burrito"], l["variable"])
)

In [22]:
# create radar chart
session.visualize("radar chart final")

In [23]:
# new data for yelp gauge
m["four"] = 4

In [24]:
# new data for google gauge
m["four_google"] = 4.25

In [25]:
session.link(path="/#/dashboard/1be")

Open the notebook in JupyterLab with the atoti extension enabled to see this link.