# Data viz ideas

## There's two main parts to this:
1) Dashboard building
2) Actual plots and such 
    - Most worn items per category 
    - Top colors worn
    - Parse data by season (winter/spring/summer/fall)

In [11]:
from dash import Dash, html, dcc, Input, Output
import altair as alt
import dash_bootstrap_components as dbc
import pandas as pd
import numpy as np

import sheworewhat as sww

alt.data_transformers.disable_max_rows()


DataTransformerRegistry.enable('default')

In [2]:
closet = sww.closet_df()

closet

Unnamed: 0,ID,Item,Category,Sub-Category,Color,Pattern,Brand,Bought,Cost,2023,Price,Name
0,0,Turtleneck,Top,Sweater,Black,Plain,Zara,"Secondhand, Thrifted",cheap,No,,Zara Turtleneck
1,1,Tank,Top,Tanktop,"Black, Red, Gold",Feather,Plisse,"Secondhand, Thrifted",cheap,No,,Plisse Tank
2,2,Tank,Top,Tanktop,"Black, Tan",Leopard,Plisse,"Secondhand, Thrifted",cheap,No,15.0,Plisse Tank
3,3,Jeans,Bottom,Pants,Blue,Plain,Aerie,New,cheap,No,,Aerie Jeans
4,4,Shirt,Top,Shirt,"Black, White",Cheetah,Free People,"Secondhand, Depop",cheap,No,,Free People Shirt
...,...,...,...,...,...,...,...,...,...,...,...,...
82,82,Christmas Tree Hoops,Accessory,Jewelry,Gold,Plain,No Brand,"Secondhand, Thrifted",cheap,No,,No Brand Christmas Tree Hoops
83,83,Square Hoops,Accessory,Jewelry,Gold,Plain,Tj Maxx,New,cheap,No,,Tj Maxx Square Hoops
84,84,Puffer,Outerwear,Coat,Green,Plain,Hollister,New,pricy,No,,Hollister Puffer
85,85,Tote Bag,Accessory,Bag,Green,Logo,Ubc,New,cheap,No,,Ubc Tote Bag


In [3]:
acc_df, bottom_df, fb_df, out_df, shoes_df, top_df = sww.closet_cat(closet)

acc_df.head(5)

Unnamed: 0,ID,Item,Category,Sub-Category,Color,Pattern,Brand,Bought,Cost,2023,Price,Name
10,10,Beanie,Accessory,Hat,"Tan, White",Stripe,Athleta,New,cheap,No,,Athleta Beanie
69,69,Scarf,Accessory,Scarf,"Black, Brown",Plaid,No Brand,"Secondhand, Thrifted",cheap,No,,No Brand Scarf
70,70,Tote Bag,Accessory,Bag,Green,Logo,Ubc,New,cheap,No,,Ubc Tote Bag
71,71,Tiny Purse,Accessory,Bag,Gold,Plain,No Brand,"Secondhand, Thrifted",cheap,No,,No Brand Tiny Purse
72,72,Crossbody,Accessory,Bag,Black,Bow,Karl Lagerfield,"Secondhand, Gifted",cheap,No,,Karl Lagerfield Crossbody


## Closet EDA
- What percentage of my closet is new vs. secondhand?

In [13]:
closet_count = closet.groupby(by="Bought").count()
closet_count

Unnamed: 0_level_0,ID,Item,Category,Sub-Category,Color,Pattern,Brand,Cost,2023,Price,Name
Bought,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
New,43,43,43,43,43,43,43,43,43,0,43
"Secondhand, Depop",2,2,2,2,2,2,2,2,2,0,2
"Secondhand, Gifted",5,5,5,5,5,5,5,5,5,0,5
"Secondhand, Thrifted",37,37,37,37,37,37,37,37,37,3,37


In [14]:
closet_n = closet.groupby(["Bought"])[["Bought"]].agg('count').rename(columns={"Bought" : "N"}).reset_index()
closet_n["Percent"] = (closet_n['N'] / len(closet))
closet_n

Unnamed: 0,Bought,N,Percent
0,New,43,0.494253
1,"Secondhand, Depop",2,0.022989
2,"Secondhand, Gifted",5,0.057471
3,"Secondhand, Thrifted",37,0.425287


Should I do just secondhand? then breakdown the secondhand

In [24]:
closet_n = closet.groupby(["Bought"])[["Bought"]].agg('count').rename(columns={"Bought" : "N"}).reset_index()
closet_n["Percent"] = (closet_n['N'] / len(closet))

closet_n["Status"] = closet_n["Bought"].str.split(",").str[1]
closet_n = closet_n.replace(np.nan, "New")

closet_comp = alt.Chart(closet_n, title="Closet Composition"
                       ).mark_bar(color="Maroon"
                         ).encode(alt.X("Bought", axis=alt.Axis(labelAngle=-45), sort='-y'),
                                  alt.Y("Percent", axis=alt.Axis(format='%'),),
                                  alt.Tooltip("Percent", format=",.2f"),
                                  color=alt.condition(
                                    alt.datum.year == "New",
                                    alt.value('orange'),
                                    alt.value('maroon')   
                                    )
                                 )
                
closet_comp

In [28]:
closet_n = closet.groupby(["Bought"])[["Bought"]].agg('count').rename(columns={"Bought" : "N"}).reset_index()
closet_n["Percent"] = (closet_n['N'] / len(closet))

closet_n["Purchased"] = closet_n["Bought"].str.split(",").str[0]
closet_n["Status"] = closet_n["Bought"].str.split(",").str[1]
closet_n = closet_n.replace(np.nan, "New")

closet_n

Unnamed: 0,Bought,N,Percent,Purchased,Status
0,New,43,0.494253,New,New
1,"Secondhand, Depop",2,0.022989,Secondhand,Depop
2,"Secondhand, Gifted",5,0.057471,Secondhand,Gifted
3,"Secondhand, Thrifted",37,0.425287,Secondhand,Thrifted


In [32]:
closet_n.groupby(["Purchased"]).count()

Unnamed: 0_level_0,Bought,N,Percent,Status
Purchased,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
New,1,1,1,1
Secondhand,3,3,3,3


In [None]:
# new vs secondhand only plot
closet_comp = alt.Chart(closet_n, title="Closet Composition"
                       ).mark_bar(color="Maroon"
                         ).encode(alt.X("Bought", axis=alt.Axis(labelAngle=-45), sort='-y'),
                                  alt.Y("Percent", axis=alt.Axis(format='%'),),
                                  alt.Tooltip("Percent", format=",.2f"),
                                  color=alt.condition(
                                    alt.datum.year == "New",
                                    alt.value('orange'),
                                    alt.value('maroon')   
                                    )
                                 )
                
closet_comp