# Mr. Trash Wheel Baltimore Healthy Harbor

**2024 Tidytuesday  Week 10**

## Setup

source .pvenv/bin/activate   
pip freeze 
pip install 'lib'

### Libraries

In [1]:
import numpy as np
import pandas as pd

In [306]:
from bokeh.models import ColumnDataSource , Label, Title, PrintfTickFormatter, NumeralTickFormatter, CustomJSTickFormatter, FixedTicker
from bokeh.palettes import Bright6
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap

In [58]:
from bokeh.io import push_notebook, show, output_notebook
from bokeh.layouts import row, gridplot
from bokeh.plotting import output_file
output_notebook()

### Fonts and Params

In [268]:
from matplotlib import rcParams
from matplotlib import font_manager

font_dirs =  ['/Library/Fonts']
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)

rcParams['font.family'] = 'Roboto'
#rcParams['font.size'] = '12'

### Data

In [8]:
url = 'https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-03-05/trashwheel.csv'
dfr = pd.read_csv(url)

## EDA &  Wrangling

### Overview

In [16]:
dfr.shape

(993, 16)

In [17]:
dfr.columns

Index(['ID', 'Name', 'Dumpster', 'Month', 'Year', 'Date', 'Weight', 'Volume',
       'PlasticBottles', 'Polystyrene', 'CigaretteButts', 'GlassBottles',
       'PlasticBags', 'Wrappers', 'SportsBalls', 'HomesPowered'],
      dtype='object')

In [18]:
dfr.describe()

Unnamed: 0,Dumpster,Year,Weight,Volume,PlasticBottles,Polystyrene,CigaretteButts,GlassBottles,PlasticBags,Wrappers,SportsBalls,HomesPowered
count,993.0,993.0,993.0,993.0,992.0,992.0,992.0,742.0,992.0,849.0,629.0,993.0
mean,230.882175,2019.567976,2.966123,14.919436,2219.330645,1436.868952,13728.122984,20.955526,983.998992,2238.764429,13.594595,45.84995
std,185.819494,2.74627,0.843446,1.609434,1650.449496,1832.434994,24049.613206,15.256844,1412.339981,2712.845792,9.736466,18.225034
min,1.0,2014.0,0.61,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,73.0,2018.0,2.45,15.0,987.5,240.0,2900.0,10.0,240.0,880.0,6.0,38.0
50%,176.0,2020.0,3.04,15.0,1900.0,750.0,4900.0,18.0,540.0,1400.0,12.0,49.0
75%,381.0,2022.0,3.53,15.0,2900.0,2130.0,12000.0,28.0,1210.0,2490.0,20.0,58.0
max,629.0,2023.0,5.62,20.0,9830.0,11528.0,310000.0,110.0,13450.0,20100.0,56.0,94.0


In [21]:
dfr.dtypes

ID                 object
Name               object
Dumpster            int64
Month              object
Year                int64
Date               object
Weight            float64
Volume              int64
PlasticBottles    float64
Polystyrene       float64
CigaretteButts    float64
GlassBottles      float64
PlasticBags       float64
Wrappers          float64
SportsBalls       float64
HomesPowered        int64
dtype: object

In [22]:
dfr.head()

Unnamed: 0,ID,Name,Dumpster,Month,Year,Date,Weight,Volume,PlasticBottles,Polystyrene,CigaretteButts,GlassBottles,PlasticBags,Wrappers,SportsBalls,HomesPowered
0,mister,Mister Trash Wheel,1,May,2014,5/16/2014,4.31,18,1450.0,1820.0,126000.0,72.0,584.0,1162.0,7.0,0
1,mister,Mister Trash Wheel,2,May,2014,5/16/2014,2.74,13,1120.0,1030.0,91000.0,42.0,496.0,874.0,5.0,0
2,mister,Mister Trash Wheel,3,May,2014,5/16/2014,3.45,15,2450.0,3100.0,105000.0,50.0,1080.0,2032.0,6.0,0
3,mister,Mister Trash Wheel,4,May,2014,5/17/2014,3.1,15,2380.0,2730.0,100000.0,52.0,896.0,1971.0,6.0,0
4,mister,Mister Trash Wheel,5,May,2014,5/17/2014,4.06,18,980.0,870.0,120000.0,72.0,368.0,753.0,7.0,0


### Subset and Renaming columns

In [55]:
mcols = ['ID', 'Year', 'PlasticBottles', 'Polystyrene', 'CigaretteButts', 'GlassBottles', 'PlasticBags', 'Wrappers', 'SportsBalls']
df = dfr.loc[:, mcols]

df_clean = (
    df
    .rename(
        columns = {
            'ID' : 'name', 'Year' : 'year', 'PlasticBottles' : 'Plastic bottles', 
            'CigaretteButts' : 'Cigarette butts', 'GlassBottles' : 'Glass bottles', 
            'PlasticBags' : 'Plastic bags', 'SportsBalls' : 'Sports balls'})
       )

df_filtered = df_clean[df_clean['year'] == 2023]

df_2023 = df_filtered.drop(['year'], axis=1)
df_long = pd.melt(df_2023, id_vars=['name'], var_name='type', value_name='total')

In [56]:
df_long.head()


Unnamed: 0,name,type,total
0,mister,Plastic bottles,1700.0
1,mister,Plastic bottles,2400.0
2,mister,Plastic bottles,3600.0
3,mister,Plastic bottles,2700.0
4,mister,Plastic bottles,2900.0


### Lumping to other

In [86]:
df_long['type'] = np.where(
    np.isin(df_long['type'],  ['Sports balls', 'Glass bottles', 'Plastic bags', 'Polystyrene']), 
    'Other',
    df_long['type'] 
)

## Viz with Bokeh

### Bar graph

#### Vertical Bars

In [89]:
type_df = df_long.groupby('type')['total'].sum().reset_index().sort_values('total')
type_df

Unnamed: 0,type,total
1,Other,74227.0
3,Wrappers,284570.0
2,Plastic bottles,337980.0
0,Cigarette butts,496090.0


In [229]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]

source = ColumnDataSource(data=dict(fruits=fruits, counts=counts))

p = figure(x_range=fruits, height=350, toolbar_location=None, title="Fruit Counts")

p.vbar(x='fruits', top='counts', width=0.9, source=source, legend_field="fruits",
       line_color='white', fill_color=factor_cmap('fruits', palette=Bright6, factors=fruits))

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 9
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)

In [287]:
trash_type = type_df.type.to_numpy()
total = type_df.total.to_numpy()

source = ColumnDataSource(data=dict(trash_type=trash_type, total=total))

p = figure(
    x_range=trash_type, 
    height=350, toolbar_location=None, 
    title="TRASH WHEEL COLLECTION")

p.vbar(x='trash_type', top='total', width=0.9, source=source, legend_field="trash_type",
       line_color='white', fill_color=factor_cmap('trash_type', palette=Bright6, factors=trash_type))

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 500000
p.legend.orientation = "vertical"
p.legend.location = "top_left"

show(p)

In [288]:
xvals = type_df.type.to_numpy()
yvals = type_df.total.to_numpy()
mcols = ['#FF5733', '#33FF57', '#3366FF', '#FF33E9']

p = figure(
    x_range = xvals,
    height=350, toolbar_location=None,
    title="TRASH WHEEL COLLECTION")

p.vbar(
    x = xvals,
    top=yvals,
    width=0.9, 
    #legend_field= xvals,
    line_color='white', 
    color  = mcols,
    #fill_color=factor_cmap(xvals, palette=Bright6, factors=xvals,)
    )
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = '#cccccc'
p.ygrid.grid_line_alpha = 0.8
p.ygrid.grid_line_dash = [2, 4]
p.y_range.start = 0
p.y_range.end = 500000
#p.legend.orientation = "horizontal"
#p.legend.location = "top_center"

show(p)

In [74]:
type_df.total.to_numpy()

array([  1242.,   1701.,  33089.,  38195., 284570., 337980., 496090.])

In [289]:
bp = figure(
    x_range=type_df["type"],  
    title="TRASH WHEEL COLLECTION",
    height=300, width=600,)
bp.vbar(
    x=type_df["type"],
    top=type_df["total"],
    width=0.8,
    color= "darkcyan")
bp.xaxis.major_label_orientation = np.pi /2# rotate labels by roughly 

show(bp)

#### H bars

In [286]:
categories = ['A', 'B', 'C', 'D']
values = [4, 7, 5, 2]

source = ColumnDataSource(data=dict(categories=categories, values=values))

p = figure(y_range=categories, width=600, height=400, title="Horizontal Bar Plot",
           toolbar_location=None, tools="")

p.hbar(y='categories', right='values', height=0.8, color='navy', legend_field="categories", source=source)

p.ygrid.grid_line_color = None
p.x_range.start = 0
p.xaxis.axis_label = "Values"
p.yaxis.axis_label = "Categories"
p.title.text_font_size = '16pt'
p.xaxis.major_label_text_font_size = '12pt'
p.yaxis.major_label_text_font_size = '12pt'

show(p)

In [290]:
categories = type_df['type'].to_numpy()
values = type_df['total'].to_numpy()
hcols = ['#4477AA', '#EE6677', '#228833', '#CCBB44']

source = ColumnDataSource(data=dict(categories=categories, values=values, hcols = hcols))

p = figure(y_range=categories, width=600, height=300, title="TRASH WHEEL COLLECTION",
           toolbar_location=None, tools="")

p.hbar(
    source=source,
    y='categories', right='values', height=0.7, color='hcols',)

p.ygrid.grid_line_color = None
p.x_range.start = 0
p.xaxis.axis_label = "Total collected"
#p.yaxis.axis_label = "Categories"
p.title.text_font_size = '16pt'
p.title.text_color = 'dodgerblue'
p.title.align = "center"
p.title.text_font = "Roboto"  # times new roman
p.title.text_font_style="bold" # italic
p.xaxis.major_label_text_font_size = '10pt'
p.yaxis.major_label_text_font_size = '10pt'

#axis formatting
#p.yaxis[0].formatter = NumeralTickFormatter(format="$0.00")
#p.xaxis[0].formatter = NumeralTickFormatter(format="0.0%")
p.xaxis[0].formatter = NumeralTickFormatter(format="0,0")

# Add subtitle
#subtitle = Label(text="This is a subtitle", x=300, y=5, text_font_size='12pt', text_color='gray')
#p.add_layout(subtitle)

#p.add_layout(Title(text="Sub-Title", text_font_style="italic"), 'above')
#p.add_layout(Title(text="Title", text_font_size="16pt"), 'above')
p.add_layout(Title(text="Total number of trash items collected by category in 2023\n", align="center"), "above")
show(p)


### Annotations

In [307]:
categories = type_df['type'].to_numpy()
values = type_df['total'].to_numpy()
hcols = ['#4477AA', '#EE6677', '#228833', '#CCBB44']

source = ColumnDataSource(data=dict(categories=categories, values=values, hcols = hcols))

p = figure(y_range=categories, width=600, height=300, title="TRASH WHEEL COLLECTION",
           toolbar_location=None, tools="")

p.hbar(
    source=source,
    y='categories', right='values', height=0.7, color='hcols',)

p.ygrid.grid_line_color = None
p.x_range.start = 0
p.xaxis.axis_label = "Total collected"
p.title.text_font_size = '16pt'
p.title.text_color = 'dodgerblue'
p.title.align = "center"
p.title.text_font = "Roboto"  # times new roman
p.title.text_font_style="bold" # italic
p.xaxis.major_label_text_font_size = '10pt'
p.yaxis.major_label_text_font_size = '10pt'
p.xaxis[0].formatter = NumeralTickFormatter(format="0,0")
p.xaxis.minor_tick_line_color = None
p.xaxis.major_tick_line_color = "#cccccc"
p.yaxis.major_tick_line_color = None

p.add_layout(
    Title(
        text="Total number collected by category in 2023\n", 
        text_font_size='14pt', text_color='gray', text_font_style="italic",
        align="center"), 
    "above")

p.xaxis.ticker=FixedTicker(ticks=[100000, 200000, 300000, 400000, 500000])
p.xaxis.formatter = CustomJSTickFormatter(code="""
    var mapping = {100000 : "100 K", 200000 : "200 K", 300000 : "300 K", 
                                      400000 : "400 K", 500000 : "500 K"};
    return mapping[tick];
""")

show(p)