# Data viz ideas

## There's two main parts to this:
1) Dashboard building
2) Actual plots and such 
    - Most worn items per category 
    - Top colors worn
    - Parse data by season (winter/spring/summer/fall)

In [1]:
from dash import Dash, html, dcc, Input, Output
import altair as alt
import dash_bootstrap_components as dbc
import pandas as pd
import numpy as np
import matplotlib as plt
from matplotlib.pyplot import figure

import datetime
import calmap
import sheworewhat as sww

alt.data_transformers.disable_max_rows()
# alt.renderers.enable('notebook')

KeyboardInterrupt: 

In [None]:
closet = sww.closet_df()

closet

In [None]:
acc_df, bottom_df, fb_df, out_df, shoes_df, top_df = sww.closet_cat(closet)

acc_df.tail(5)

## Closet EDA
- What percentage of my closet is new vs. secondhand?

In [None]:
closet_count = closet.groupby(by="Bought").count()
closet_count

In [None]:
closet_n = closet.groupby(["Bought"])[["Bought"]].agg('count').rename(columns={"Bought" : "N"}).reset_index()
closet_n["Percent"] = (closet_n['N'] / len(closet))
closet_n

Should I do just secondhand? then breakdown the secondhand

In [None]:
closet_n = closet.groupby(["Bought"])[["Bought"]].agg('count').rename(columns={"Bought" : "N"}).reset_index()
closet_n["Percent"] = (closet_n['N'] / len(closet))

closet_n["Status"] = closet_n["Bought"].str.split(",").str[1]
closet_n = closet_n.replace(np.nan, "New")

closet_comp = alt.Chart(closet_n, title="Closet Composition"
                       ).mark_bar(color="Maroon"
                         ).encode(alt.X("Bought", axis=alt.Axis(labelAngle=-45), sort='-y'),
                                  alt.Y("Percent", axis=alt.Axis(format='%'),),
                                  alt.Tooltip("Percent", format=",.2f"),
                                  color=alt.condition(
                                    alt.datum.year == "New",
                                    alt.value('orange'),
                                    alt.value('maroon')   
                                    )
                                 )
                
closet_comp

In [None]:
closet_n = closet.groupby(["Bought"])[["Bought"]].agg('count').rename(columns={"Bought" : "N"}).reset_index()
closet_n["Percent"] = (closet_n['N'] / len(closet))

closet_n["Purchased"] = closet_n["Bought"].str.split(",").str[0]
closet_n["Status"] = closet_n["Bought"].str.split(",").str[1]
closet_n = closet_n.replace(np.nan, "New")

closet_n

In [None]:
closet_n.groupby(["Purchased"]).count()

In [None]:
# new vs secondhand only plot
closet_comp = alt.Chart(closet_n, title="Closet Composition"
                       ).mark_bar(color="Maroon"
                         ).encode(alt.X("Bought", axis=alt.Axis(labelAngle=-45), sort='-y'),
                                  alt.Y("Percent", axis=alt.Axis(format='%'),),
                                  alt.Tooltip("Percent", format=",.2f"),
                                  color=alt.condition(
                                    alt.datum.year == "New",
                                    alt.value('orange'),
                                    alt.value('maroon')   
                                    )
                                 )
                
closet_comp

## clean and merge data
- need function to clean, merge, do counts and stuff for collected data

In [None]:
path = "../data/2023TestData.csv"

df = pd.read_csv(path)
df

We don't need the timestamp, its redundant and easier to just use date

In [None]:
df.iloc[:, 1:]

In [None]:
df.drop("Timestamp", axis=1)

two things: would be useful to have counts and also the dates in a list per item

Need to be able to handle multiple items in a single category (layering, etc)
- doesnt make sense to edit google sheet because layering can happen with all categories

In [None]:
path = "../data/2023TestData.csv"

df = pd.read_csv(path).drop("Timestamp", axis=1).melt("Date").dropna()

df

In [None]:
df.groupby("value").count()

In [None]:
df["test0"] = df["value"].str.split(",").str[0]
df["test1"] = df["value"].str.split(",").str[1]
df.head(3)

ugh maybe this can be reformatted somehow for easier parsing; also i dont love how the format is now

OR 
maybe if column contains more than 1 number, split it i

IDEA:
- GOOGLE SHEET FORM: ID ITEM - COLOR
- then multiple selections can be parse via "," in the sheet

In [None]:
# def closet
path = "../data/2023TestData.csv"

df = pd.read_csv(path).drop("Timestamp", axis=1).melt("Date").dropna()

# extract ID number from value
df["ID"] = df.value.str.extract('(\d+)').astype(int)

closet_counts = df.groupby(["value", "ID"]).count().reset_index().rename(columns={"Date":"count"}).drop(["variable"],axis=1)

closet_counts

# left join closet + df
x = pd.merge(closet, closet_counts, how="left", on="ID")
x

In [None]:
x = pd.merge(closet, df, on="ID")


In [None]:
closet_counts = df.groupby(["value"]).count().reset_index().rename(columns={"Date":"count"}).drop(["ID", "variable"],axis=1)
closet_counts

In [None]:
closet_comp = alt.Chart(closet_counts, title="2023 Most Worn Pieces"
                       ).mark_bar(color="Maroon"
                         ).encode(alt.X("value", axis=alt.Axis(labelAngle=-45), sort='-y'),
                                  alt.Y("count",),
                                  alt.Tooltip("value", format=",.2f"),
                                  color=alt.condition(
                                    alt.datum.year == "New",
                                    alt.value('orange'),
                                    alt.value('maroon')   
                                    )
                                 )
                
closet_comp

## Basic Wrangling Function
1. takes in google form data
2. counts how many of each item logged
3. left joins with closet data

In [None]:
def complete_df(closet, path="../data/2023TestData.csv"):
    """
    Function to merge raw closet data and collected 2023 data.
    
    Parameters
    ----------
        closet : pandas.DataFrame
            Dataframe containing complete closet log.
        path : string
            String containing path of CSV of collected data.
    
    Returns
    -------
        complete_df : pandas.DataFrame
            Dataframe containing "ID", "Name", "count", "Item", 
            "Category", "Sub-Category", "Color", "Pattern", "Brand", "Cost", "2023"
    """


    form = pd.read_csv(path).drop("Timestamp", axis=1).melt("Date").dropna()

    # extract ID number from value
    form["ID"] = form.value.str.extract('(\d+)').astype(int)

    form_counts = form.groupby(["value", "ID"]).count().reset_index().rename(columns={"Date":"count"}).drop(["variable"],axis=1)
    
    # left join closet + df
    complete_df = pd.merge(closet, form_counts, how="left", on="ID")
    complete_df["Name"] = complete_df["Brand"] + " " + complete_df["Item"]
    complete_df = complete_df[["ID", "Name", "count", "Item", "Category", "Sub-Category", "Color", "Pattern", "Brand", "Cost", "2023"]]
    complete_df = complete_df.fillna(0).rename(columns={"count" : "Count"})
    complete_df["Count"] = complete_df["Count"].astype(int)
    
    return complete_df


In [None]:
closet = sww.closet_df()
worn_df = complete_df(closet)
worn_df

In [None]:
worn_df["Count"].astype(int)

## 2023 Most Worn Pieces

In [None]:
most_worn = worn_df.nlargest(15, columns="Count")
closet_comp = alt.Chart(most_worn, title="2023 Most Worn Pieces"
                       ).mark_bar(color="Maroon"
                         ).encode(alt.X("Name", axis=alt.Axis(labelAngle=-45), sort="-y"),
                                  alt.Y("Count", 
                                        title="# of Times Worn",
                                        axis=alt.Axis(tickMinStep=1)),
                                  alt.Tooltip("Count")
                                 )
                
closet_comp

In [None]:
def plot_mostworn():
    most_worn = worn_df.nlargest(15, columns="Count")
    closet_comp = alt.Chart(most_worn, title="2023 Most Worn Pieces"
                       ).mark_bar(color="Maroon"
                         ).encode(alt.X("Name",
                                        title="",
                                        axis=alt.Axis(labelAngle=-45), 
                                        sort="-y"),
                                  alt.Y("Count", 
                                        title="Times Worn",
                                        axis=alt.Axis(tickMinStep=1)),
                                  alt.Tooltip("Count")
                                 )
                
    return closet_comp

In [None]:
plot_mostworn()

### Easier way to plot count 
- can use "count()" and make complete_df function less complex
- this doesn't seem to work unless you limit the x-axis data first so move on

In [None]:
closet = sww.closet_df()

In [None]:
form = pd.read_csv("../data/2023TestData.csv").drop("Timestamp", axis=1).melt("Date").dropna()

# extract ID number from value
form["ID"] = form.value.str.extract('(\d+)').astype(int)

# left join closet + df
complete_df = pd.merge(closet, form, how="left", on="ID")
complete_df["Name"] = complete_df["Brand"] + " " + complete_df["Item"]
complete_df = complete_df[["ID", "Name", "Item", "Category", "Sub-Category", "Color", "Pattern", "Brand", "Cost", "2023"]]


In [None]:
closet_comp = alt.Chart(complete_df, title="2023 Most Worn Pieces"
                       ).mark_bar(color="Maroon"
                         ).encode(alt.X("Name", axis=alt.Axis(labelAngle=-45), sort="-y"),
                                  alt.Y("count()", 
                                        title="# of Times Worn",
                                        axis=alt.Axis(tickMinStep=1)),
                                  alt.Tooltip("count()")
                                 )
                
closet_comp

## Make similar plots but facet per category!

The problem with the below code is the x-axis is the same and we do not want that. We want the x axis to contain unique items for that category!
- Solution: generate 6 separate plots and concat

In [None]:
category_worn = worn_df.nlargest(15, columns="Count")
closet_comp = alt.Chart(category_worn, title="2023 Most Worn Pieces"
                       ).mark_bar(color="Maroon"
                       ).encode(alt.X("Name", axis=alt.Axis(labelAngle=-45), sort="-y"),
                                alt.Y("Count", 
                                title="# of Times Worn",
                                axis=alt.Axis(tickMinStep=1)),
                                alt.Tooltip("Count")
                        ).facet("Category", columns=3)
                
closet_comp

In [None]:
worn_df["Category"]

**Solution**: Concat six graphs because the x and y axes will inherently very different

*whyyyyy won't sort="y" work pls*

In [None]:
categories = ["Top", "Bottom", "Full Body", "Outerwear", "Accessory", "Shoes"]

cat_plots = []

for i in categories:
    category_worn = worn_df.loc[worn_df["Category"] == i].nlargest(15, columns="Count")
    
    category_plot = alt.Chart(category_worn, title=f"2023 Most Worn {i}"
                       ).mark_bar(color="#B79492"
                       ).encode(alt.X("Name", title="", axis=alt.Axis(labelAngle=-45), sort="-y"),
                                alt.Y("Count", 
                                title="# of Times Worn",
                                axis=alt.Axis(tickMinStep=1)),
                                alt.Tooltip(["Name", "Count"])
                        ).resolve_scale(x='independent')
    cat_plots.append(category_plot)

# configure altair charts
row1 = alt.hconcat(cat_plots[0], cat_plots[1], cat_plots[2])
row2 = alt.hconcat(cat_plots[3], cat_plots[4], cat_plots[5])

category_plot = alt.vconcat(row1, row2)
category_plot

Troubleshooting this gd error

In [None]:
category_worn = worn_df.loc[worn_df["Category"] == "Top"].nlargest(10, columns="Count")
category_worn["Count"]

In [None]:
category_worn.head(5)

Weirdly the solution seems to be reduce the dataframe I'm pulling from?
`.nlargest(15, columns="Count")` to `nlargest.(5, columns="Count")` fixed it in the concat'd plot

In [None]:
alt.Chart(category_worn, title=f"2023 Most Worn Top"
                   ).mark_bar(color="Pink"
                   ).encode(alt.X("Name", title="", axis=alt.Axis(labelAngle=-45), sort="-y"),
                            alt.Y("Count", 
                            title="# of Times Worn",
                            axis=alt.Axis(tickMinStep=1)),
                            alt.Tooltip(["Name", "Count"])
                    ).resolve_scale(x='independent')

In [None]:
categories = ["Top", "Bottom", "Full Body", "Outerwear", "Accessory", "Shoes"]

cat_plots = []

for i in categories:
    category_worn = worn_df.loc[worn_df["Category"] == i].nlargest(5, columns="Count")
    
    category_plot = alt.Chart(category_worn, title=f"2023 Most Worn {i}"
                       ).mark_bar(color="#B40490"
                       ).encode(alt.X("Name", title="", axis=alt.Axis(labelAngle=-45), sort="-y"),
                                alt.Y("Count", 
                                title="# of Times Worn",
                                axis=alt.Axis(tickMinStep=1)),
                                alt.Tooltip(["Name", "Count"])
                        )
    cat_plots.append(category_plot)

# configure altair charts
row1 = alt.hconcat(cat_plots[0], cat_plots[1], cat_plots[2])
row2 = alt.hconcat(cat_plots[3], cat_plots[4], cat_plots[5])

category_plot = alt.vconcat(row1, row2)
category_plot

Make a plot function!

In [None]:
def plot_facet():
    
    categories = ["Top", "Bottom", "Full Body", "Outerwear", "Accessory", "Shoes"]

    cat_plots = []

    for i in categories:
        category_worn = worn_df.loc[worn_df["Category"] == i].nlargest(5, columns="Count")

        category_plot = alt.Chart(category_worn, title=f"2023 Most Worn {i}"
                           ).mark_bar(color="#B40490"
                           ).encode(alt.X("Name", title="", axis=alt.Axis(labelAngle=-45), sort="-y"),
                                    alt.Y("Count", 
                                    title="# of Times Worn",
                                    axis=alt.Axis(tickMinStep=1)),
                                    alt.Tooltip(["Name", "Count"])
                            ).properties(
                                height=200,
                                width=150
                                )
        cat_plots.append(category_plot)

    # configure altair charts
    row1 = alt.hconcat(cat_plots[0], cat_plots[1], cat_plots[2])
    row2 = alt.hconcat(cat_plots[3], cat_plots[4], cat_plots[5])

    category_plot = alt.vconcat(row1, row2)
    return category_plot

In [None]:
plot_facet()

## Heatmap of plots

In [None]:
path = "../data/2023TestData.csv"

df = pd.read_csv(path).drop("Timestamp", axis=1).melt("Date").dropna()
df["Date"] = pd.to_datetime(df["Date"])

# extract ID number from value
df["ID"] = df.value.str.extract('(\d+)').astype(int)
df = pd.merge(closet, df, how="right", on="ID")
df = df[["ID", "Item", "Color", "Pattern", "Category", "Date"]]
df.sample(4)

Maybe once we have the top 10 items we can heatmap them?

In [None]:
top_10 = [10, 85, 81]
heatmap_data = df.loc[df["ID"] == top_10[1]]
heatmap_data

Ok idea is:
- make a calender df
- full merge the above df
    - replace NA item with ..?
    - or make a boolean column that will be colored if yes? (0 1) 
- plot month on x and day on y axis 

In [None]:
time_df = pd.DataFrame()
time_df["Date"] = pd.date_range('2022-12-01', periods=365)
time_df["Day"] = time_df["Date"].dt.day_name()


In [None]:
year = pd.merge(time_df, heatmap_data, how="outer", on="Date")
year["Item"] = year["Item"].replace(np.nan, 0) 
year["Bool"] = np.where(year["Item"] == 0, 0, 1)

In [None]:
year[15:22]

This is the ugliest plot I have seen in my life

In [None]:
weekdays = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Sunday"]
alt.Chart(year).mark_rect().encode(
    alt.X("Date:O", axis = alt.Axis(labelAngle=-45)), 
    alt.Y("Day", sort=weekdays), 
    color="Bool"
)

I wonder if it would be a better viz to see what months it was worn the most intead?
- bc yeah otherwise big x axis 

package: calmap

In [None]:
year = year.set_index("Date")

In [None]:
from matplotlib.pyplot import figure

figure(figsize=(12, 12))
cmap = calmap.yearplot(year["Bool"], cmap='BuPu', fillcolor='lightgray')
cmap

In [None]:
week = time_df["Date"].dt.isocalendar()
time_df["Week"] = week["week"]

In [None]:
weekdays = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Sunday"]

time_df = pd.DataFrame()
time_df["Date"] = pd.date_range('2022-12-01', periods=365)
time_df["Day"] = time_df["Date"].dt.day_name()

year = pd.merge(time_df, heatmap_data, how="outer", on="Date")
year["Item"] = year["Item"].replace(np.nan, 0) 
year["Bool"] = np.where(year["Item"] == 0, 0, 1)

week = time_df["Date"].dt.isocalendar()
year["Week"] = week["week"].fillna(52)
year["Week"] = year["Week"].fillna(52)

year.tail(5)

In [None]:
alt.Chart(year, title="Heatmap of Green Totebag Wears").mark_rect(
    stroke="white",
    strokeWidth=3,
    opacity=0.9
    ).encode(
    alt.X("Week:O", 
          axis = alt.Axis(labelAngle=-45)
         ), 
    alt.Y("Day", 
          sort=weekdays),
    alt.Color("Bool", 
              scale=alt.Scale(scheme='greenblue'),
              legend=None
             ),
    alt.Tooltip(["Date", "Day"])
    ).properties(
    height=200,
    width=700
    )

In [None]:
# scale=alt.Scale(
#             domain=[-10,-5,0,5,9], 
#             range=['red','orange','white','lightblue','darkblue'], 
#             interpolate=method
#             )

In [None]:
alt.Chart(year, title="Heatmap of Green Totebag Wears").mark_rect(
    stroke="white",
    strokeWidth=3,
    opacity=0.9,
    ).encode(
    alt.X("Week:O", 
          axis = alt.Axis(labelAngle=-45)
         ), 
    alt.Y("Day", 
          sort=weekdays),
    alt.Color("Bool", 
            scale=alt.Scale(
                domain=[0,1], 
                range=["#e0ddd5", "#7c9e7b"]),
                legend=None
             ),
    alt.Tooltip(["Date", "Day"])
    ).properties(
    height=200,
    width=600
    )

In [None]:
year.iloc[16:18]

idea: fix week x axis to dates
- also what happened dec 18th lol

oh I think if the x axis is by week then everything will look better

In [None]:
year['First_day'] = year['Date'] - year['Date'].dt.weekday * np.timedelta64(1, 'D')

# year["Week2"] = year["Week"].fillna(52)

year

In [None]:
alt.Chart(year, title="Heatmap of Green Totebag Wears").mark_rect(
    stroke="white",
    strokeWidth=3,
    opacity=0.9,
    ).encode(
    alt.X("First_day:O", 
          axis = alt.Axis(labelAngle=-45,                         )
         ), 
    alt.Y("Day", 
          sort=weekdays),
    alt.Color("Bool", 
            scale=alt.Scale(
                domain=[0,1], 
                range=["#e0ddd5", "#7c9e7b"]),
                legend=None
             ),
    alt.Tooltip(["Date", "Day"])
    ).properties(
    height=200,
    width=600
    )

In [None]:
week = time_df["Date"].dt.strftime('%m-%d-%y')

In [None]:
week = time_df["Date"].dt.strftime('%m-%d-%y')
year['D'] = time_df["Date"].dt.strftime('%m-%d-%y')


In [None]:
year["Week"] = year["First_day"].dt.strftime('%m-%d-%y')
year

In [None]:
weekdays = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Sunday"]

alt.Chart(year, title="Heatmap of Green Totebag Wears").mark_rect(
    stroke="white",
    strokeWidth=3,
    opacity=0.9,
    ).encode(
    alt.X("Week:O", 
          axis = alt.Axis(labelAngle=-45,                         )
         ), 
    alt.Y("Day", 
          sort=weekdays),
    alt.Color("Bool", 
            scale=alt.Scale(
                domain=[0,1], 
                range=["#e0ddd5", "#7c9e7b"]),
                legend=None
             ),
    alt.Tooltip(["Date", "Day"])
    ).properties(
    height=200,
    width=600
    )

## Testing out and fixing for function

In [None]:
# read in Google Form data
path = "../data/2023TestData.csv"
df = pd.read_csv(path).drop("Timestamp", axis=1).melt("Date").dropna()
df["Date"] = pd.to_datetime(df["Date"])
df["ID"] = df.value.str.extract('(\d+)').astype(int)

# column of day of week for one calender year 
time_df = pd.DataFrame()
time_df["Date"] = pd.date_range(df["Date"].min(), periods=365) 
time_df["Day"] = time_df["Date"].dt.day_name()

# merge dataframes
df = pd.merge(closet, df, how="right", on="ID")
df = df[["ID", "Item", "Color", "Pattern", "Category", "Date"]]

# select item 
top_10 = [10, 85, 81] # need to make this dynamic!! 
heatmap_data = df.loc[df["ID"] == top_10[1]]

year = pd.merge(time_df, heatmap_data, how="outer", on="Date")
year["Item"] = year["Item"].replace(np.nan, 0) 
year["Bool"] = np.where(year["Item"] == 0, 0, 1)

weekdays = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Sunday"]

week = time_df["Date"].dt.isocalendar()
year["Week"] = week["week"].fillna(52)
year["Week"] = year["Week"].fillna(52)
year['First_day'] = year['Date'] - year['Date'].dt.weekday * np.timedelta64(1, 'D')

week = time_df["Date"].dt.strftime('%m-%d-%y')
year["Week"] = year["First_day"].dt.strftime('%m-%d-%y')


year.tail(5)

Ok not totally dynamic but the actual selection will change in plot

In [None]:
closet = sww.closet_df()
worn_df = sww.complete_df(closet)
most_worn = worn_df.nlargest(10, columns="Count")

top_10 = most_worn["Count"].to_list() 
heatmap_data = df.loc[df["ID"] == top_10[1]] # need to make this dynamic in plot

In [None]:
# read in Google Form data
def top_10_df(path = "../data/2023TestData.csv"):
    """
    Function to return IDs and counts of top 10 most worn items.
    
    Parameters:
    -----------
         path : str
            Path to CSV file containing closet information.
    
    Returns:
    --------
        top_10 : list
            List containing the IDs of the top 10 most worn items.
        
        df : pandas.DataFrame
            Dataframe containing data only for top 10 most worn items.
    """
    df = pd.read_csv(path).drop("Timestamp", axis=1).melt("Date").dropna()
    df["Date"] = pd.to_datetime(df["Date"])
    df["ID"] = df.value.str.extract('(\d+)').astype(int)

    # data wrangling to select top 10 most worn items
    closet = sww.closet_df()
    worn_df = sww.complete_df(closet)
    most_worn = worn_df.nlargest(10, columns="Count")
    
    # merge dataframes
    df = pd.merge(closet, df, how="right", on="ID")
    df = df[["ID", "Item", "Color", "Pattern", "Category", "Date", "Brand"]]

    top_10 = most_worn["ID"].to_list() 
 
    return top_10, df


In [None]:
def plot_heatmap(df, top_10, i=0):
    """
    Function for heatmap plot.
    
    Parameters:
    -----------
         df : pandas.DataFrame   
         
         top_10 : list
            List containing the IDs of the top 10 most worn items.
         
    Returns:
    --------
        heatplot : altair.Chart
            Heatmap plot for a single item over a single calender year. 
             
    """
    
    # column of day of week for one calender year 
    time_df = pd.DataFrame()
    time_df["Date"] = pd.date_range(df["Date"].min(), periods=365) 
    time_df["Day"] = time_df["Date"].dt.day_name()
    
    
    heatmap_data = df.loc[df["ID"] == top_10[i]] # need to make this dynamic in plot
    item_name = heatmap_data["Brand"].iloc[0] + " " + heatmap_data["Item"].iloc[0]
    
    # isolate item data
    year = pd.merge(time_df, heatmap_data, how="outer", on="Date")
    year["Item"] = year["Item"].replace(np.nan, 0) 
    year["Bool"] = np.where(year["Item"] == 0, 0, 1)

    # this is horrible to read lol
    # wrangling for prettier plotting 
    week = time_df["Date"].dt.isocalendar()
    year["Week"] = week["week"].fillna(52)
    year["Week"] = year["Week"].fillna(52)
    year['First_day'] = year['Date'] - year['Date'].dt.weekday * np.timedelta64(1, 'D')
    week = time_df["Date"].dt.strftime('%m-%d-%y')
    year["Week"] = year["First_day"].dt.strftime('%m-%d')
    year = year[["Date", "Day", "Item", "ID", "Bool", "Week"]]

    weekdays = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Sunday"]

    heat_plot = alt.Chart(year, title=f"{item_name} in 2023").mark_rect(
    stroke="white",
    strokeWidth=3,
    opacity=0.9,
    ).encode(
    alt.X("Week:O", 
          axis = alt.Axis(labelAngle=-60,                         )
         ), 
    alt.Y("Day", 
          sort=weekdays),
    alt.Color("Bool", 
            scale=alt.Scale(
                domain=[0,1], 
                range=["#e0ddd5", "#7c9e7b"]),
                legend=None
             ),
    alt.Tooltip(["Date", "Day"])
    ).properties(
    height=200,
    width=600
    )
    return heat_plot


In [None]:
top_10, df = top_10_df()
plot = plot_heatmap(df, top_10,i=5)
plot

## Finalized plots

In [None]:
# read in Google Form data
def plot_mostworn():
    most_worn = worn_df.nlargest(15, columns="Count")
    closet_comp = alt.Chart(most_worn, title="2023 Most Worn Pieces"
                       ).mark_bar(color="#6ce4d8"
                         ).encode(alt.X("Name",
                                        title="",
                                        axis=alt.Axis(labelAngle=-45), 
                                        sort="-y"),
                                  alt.Y("Count", 
                                        title="Times Worn",
                                        axis=alt.Axis(tickMinStep=1)),
                                  alt.Tooltip("Count")
                                 ).configure_axis(grid=False, domain=False)
                
    return closet_comp

def plot_facet():
    
    categories = ["Top", "Bottom", "Full Body", "Outerwear", "Accessory", "Shoes"]

    cat_plots = []

    for i in categories:
        category_worn = worn_df.loc[worn_df["Category"] == i].nlargest(5, columns="Count")

        category_plot = alt.Chart(category_worn, title=f"2023 Most Worn {i}"
                           ).mark_bar(color="#827191",
                           ).encode(alt.X("Name", title="", axis=alt.Axis(labelAngle=-45), sort="-y"),
                                    alt.Y("Count", 
                                    title="# of Times Worn",
                                    axis=alt.Axis(tickMinStep=1)),
                                    alt.Tooltip(["Name", "Count"])
                            ).properties(
                                height=200,
                                width=150
                                )
        cat_plots.append(category_plot)

    # configure altair charts
    row1 = alt.hconcat(cat_plots[0], cat_plots[1], cat_plots[2])
    row2 = alt.hconcat(cat_plots[3], cat_plots[4], cat_plots[5])

    category_plot = alt.vconcat(row1, row2).configure_axis(grid=False, domain=False)
    return category_plot

def top_10_df(path = "../data/2023TestData.csv"):
    """
    Function to return IDs and counts of top 10 most worn items.
    
    Parameters:
    -----------
         path : str
            Path to CSV file containing closet information.
    
    Returns:
    --------
        top_10 : list
            List containing the IDs of the top 10 most worn items.
        
        df : pandas.DataFrame
            Dataframe containing data only for top 10 most worn items.
    """
    df = pd.read_csv(path).drop("Timestamp", axis=1).melt("Date").dropna()
    df["Date"] = pd.to_datetime(df["Date"])
    df["ID"] = df.value.str.extract('(\d+)').astype(int)

    # data wrangling to select top 10 most worn items
    closet = sww.closet_df()
    worn_df = sww.complete_df(closet)
    most_worn = worn_df.nlargest(10, columns="Count")
    
    # merge dataframes
    df = pd.merge(closet, df, how="right", on="ID")
    df = df[["ID", "Item", "Color", "Pattern", "Category", "Date", "Brand"]]

    top_10 = most_worn["ID"].to_list() 
 
    return top_10, df


def plot_heatmap(df, top_10, i=0):
    """
    Function for heatmap plot.
    
    Parameters:
    -----------
         df : pandas.DataFrame   
         
         top_10 : list
            List containing the IDs of the top 10 most worn items.
         
    Returns:
    --------
        heatplot : altair.Chart
            Heatmap plot for a single item over a single calender year. 
             
    """
    
    # column of day of week for one calender year 
    time_df = pd.DataFrame()
    time_df["Date"] = pd.date_range(df["Date"].min(), periods=365) 
    time_df["Day"] = time_df["Date"].dt.day_name()
    
    
    heatmap_data = df.loc[df["ID"] == top_10[i]] # need to make this dynamic in plot
    item_name = heatmap_data["Brand"].iloc[0] + " " + heatmap_data["Item"].iloc[0]
    
    # isolate item data
    year = pd.merge(time_df, heatmap_data, how="outer", on="Date")
    year["Item"] = year["Item"].replace(np.nan, 0) 
    year["Bool"] = np.where(year["Item"] == 0, 0, 1)

    # this is horrible to read lol
    # wrangling for prettier plotting 
    week = time_df["Date"].dt.isocalendar()
    year["Week"] = week["week"].fillna(52)
    year["Week"] = year["Week"].fillna(52)
    year['First_day'] = year['Date'] - year['Date'].dt.weekday * np.timedelta64(1, 'D')
    week = time_df["Date"].dt.strftime('%m-%d-%y')
    year["Week"] = year["First_day"].dt.strftime('%m-%d')
    year = year[["Date", "Day", "Item", "ID", "Bool", "Week"]]

    weekdays = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Sunday"]

    heat_plot = alt.Chart(year, title=f"{item_name} in 2023").mark_rect(
    stroke="white",
    strokeWidth=3,
    opacity=0.9,
    ).encode(
    alt.X("Week:O", 
          axis = alt.Axis(labelAngle=-60,                         )
         ), 
    alt.Y("Day", 
          sort=weekdays,
         title=""),
    alt.Color("Bool", 
            scale=alt.Scale(
                domain=[0,1], 
                range=["#e0ddd5", "#74a675"]),
                legend=None
             ),
    alt.Tooltip(["Date", "Day"])
    ).properties(
    height=200,
    width=600
    ).configure_axis(grid=False, domain=False)
    return heat_plot


In [None]:
plot_mostworn()

In [None]:
plot_facet()

In [None]:
plot_heatmap(df, top_10)

TO DO NEXT:
- fix the concat'd function to plot y ="count"
- color scheme of plots 
    - `range=["#e0ddd5", "#7c9e7b"]` green/warm gray heatmap
- ["#bb8c9d","#9a8ca6","#8ba88a","#5bccc1","#e0ddd5"]


# Note to self 
- maybe add ID into google sheet for easier merging later?

IDEA: 
- is there anyway to read google sheet data in automatically? its a pain to download everytime for testing