In [2]:
import pandas as pd
from pathlib import Path
from datetime import timedelta
import numpy as np

from bokeh.io import show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Range1d

In [3]:
root_folder = Path.cwd().parents[1]

df = pd.read_csv(root_folder/'data/processed'/'01-final.csv')

df['PSTdt'] = pd.to_datetime(df['PST'], utc=True)-timedelta(hours=8)

df['Month'] = df['PSTdt'].dt.month

df17 = df[df.PSTdt.dt.year == 2017]

sorted = df17.sort_values(by=['Month','Score', 'PSTdt'], axis = 0)

jan = sorted[sorted['PSTdt'].dt.month==1].reset_index(drop=True)

In [8]:
jan.head()

Unnamed: 0,PST,Tide,Height,Deg,Period,Wind Speed,Wind Direction,Ideal Wind,Ideal Period,Ideal Tide,Ideal Swell,Score,PSTdt,Month
0,2017-01-06 12:00:00-08:00,1.87,1.26,216.0,4.0,6.0,170,0,0,0,0,0,2017-01-06 12:00:00+00:00,1
1,2017-01-07 10:00:00-08:00,1.71,2.15,110.0,4.0,6.0,70,0,0,0,0,0,2017-01-07 10:00:00+00:00,1
2,2017-01-07 11:00:00-08:00,1.07,2.16,113.0,4.0,6.0,60,0,0,0,0,0,2017-01-07 11:00:00+00:00,1
3,2017-01-07 12:00:00-08:00,0.95,1.92,113.0,4.0,7.0,140,0,0,0,0,0,2017-01-07 12:00:00+00:00,1
4,2017-01-07 13:00:00-08:00,0.63,1.84,115.0,4.0,6.0,140,0,0,0,0,0,2017-01-07 13:00:00+00:00,1


In [34]:
jan['nth hour'] = jan.index.to_list()
jan['nth hour'] = jan['nth hour'].apply(lambda x: float(x))+1

In [46]:
#define width of bar
width = 5

#create x-y positions
jan['x'] = (jan['nth hour'] % width)+1
jan['y'] = np.ceil(jan['nth hour']/width)

In [36]:
jan.head()

Unnamed: 0,PST,Tide,Height,Deg,Period,Wind Speed,Wind Direction,Ideal Wind,Ideal Period,Ideal Tide,Ideal Swell,Score,PSTdt,Month,nth hour,x,y
0,2017-01-06 12:00:00-08:00,1.87,1.26,216.0,4.0,6.0,170,0,0,0,0,0,2017-01-06 12:00:00+00:00,1,1.0,1.0,1.0
1,2017-01-07 10:00:00-08:00,1.71,2.15,110.0,4.0,6.0,70,0,0,0,0,0,2017-01-07 10:00:00+00:00,1,2.0,2.0,1.0
2,2017-01-07 11:00:00-08:00,1.07,2.16,113.0,4.0,6.0,60,0,0,0,0,0,2017-01-07 11:00:00+00:00,1,3.0,0.0,1.0
3,2017-01-07 12:00:00-08:00,0.95,1.92,113.0,4.0,7.0,140,0,0,0,0,0,2017-01-07 12:00:00+00:00,1,4.0,1.0,2.0
4,2017-01-07 13:00:00-08:00,0.63,1.84,115.0,4.0,6.0,140,0,0,0,0,0,2017-01-07 13:00:00+00:00,1,5.0,2.0,2.0


gray #808080
dodgerblue #1E90FF
limegreen #32CD32
ornage #FFA500
orangered #FF4500

In [37]:
jan['color'] = ''
jan.loc[jan['Score']==0, 'color'] = "#808080"
jan.loc[jan['Score']==1, 'color'] = "#1E90FF"
jan.loc[jan['Score']==2, 'color'] = "#32CD32"
jan.loc[jan['Score']==3, 'color'] = "#FFA500"
jan.loc[jan['Score']==4, 'color'] = "#FF4500"

In [47]:
source = ColumnDataSource(data=jan)


plot = figure(
    title=None, width=500, height=500,
    min_border=0, toolbar_location=None,
    x_range = Range1d(0, 20),
    #y_range = Range1d(0,20)
    )

plot.scatter(x="x", y="y", size=20, source=source, color="color", line_color="white")

show(plot)

Great success! Now going to try it with the entire year

In [4]:
df17 = sorted.reset_index(drop=True)
df17['nth hour'] = df17.index.to_list()
df17['nth hour'] = df17['nth hour'].apply(lambda x: float(x))+1

In [5]:
counts = df17.groupby('Month').count()
counts['count']=counts['PST']
counts = counts.reset_index()[['Month','count']]
row = {'Month': 0, 'count':0}

counts = counts.append(row, ignore_index=True)

  counts = counts.append(row, ignore_index=True)


In [6]:
n=0
for i in range(13):
    n += counts.loc[i,'count']
    counts.loc[i,'count'] = n

In [7]:
counts.loc[12, 'count'] = 0

In [8]:
counts

Unnamed: 0,Month,count
0,1,341
1,2,673
2,3,1074
3,4,1491
4,5,1956
5,6,2406
6,7,2871
7,8,3321
8,9,3711
9,10,4090


In [9]:
counts = counts.shift(2).fillna(0)
counts.Month = counts.index.to_list()
counts

Unnamed: 0,Month,count
0,0,0.0
1,1,0.0
2,2,341.0
3,3,673.0
4,4,1074.0
5,5,1491.0
6,6,1956.0
7,7,2406.0
8,8,2871.0
9,9,3321.0


In [10]:
df17 = df17.merge(counts, on = "Month")

In [11]:
#define width of bar and space between bars
width = 5
space = 5

#create x-y positions
df17['x'] = (df17['nth hour'] % width)
df17['x'] += 1+(df['Month']-1)*width + (df['Month']-1)*space
df17['y'] = np.ceil((df17['nth hour']-df17['count'])/width)

In [12]:
df17['color'] = ''
df17.loc[df17['Score']==0, 'color'] = "#808080"
df17.loc[df17['Score']==1, 'color'] = "#1E90FF"
df17.loc[df17['Score']==2, 'color'] = "#32CD32"
df17.loc[df17['Score']==3, 'color'] = "#FFA500"
df17.loc[df17['Score']==4, 'color'] = "#FF4500"

In [35]:
df17.columns

Index(['PST', 'Tide', 'Height', 'Deg', 'Period', 'Wind Speed',
       'Wind Direction', 'Ideal Wind', 'Ideal Period', 'Ideal Tide',
       'Ideal Swell', 'Score', 'PSTdt', 'Month', 'nth hour', 'count', 'x', 'y',
       'color'],
      dtype='object')

In [49]:
source = ColumnDataSource(data=df17)

TITLE = 'Hourly Wave Quality in Isla Vista ' + str(df17.PSTdt[0].year)
TOOLS = "hover,wheel_zoom,box_zoom,reset,save"

plot = figure(tools=TOOLS, toolbar_location='above',
    title=TITLE, width=1500, height=800,
    min_border=0
    #x_range = Range1d(0, 20),
    #y_range = Range1d(0,20)
    )

plot.hover.tooltips = [
    ("time", "@PSTdt{%m/%d at %H:00}"),
    ("score", "@Score"),
    ("tide (ft)", "@Tide"),
    ("swell height (m)", "@Height"),
    ("swell direction", "@Deg"),
    ("swell period (s)", "@Period"),
    ("wind speed (mph)", "@{Wind Speed}"),
    ("wind direction", "@{Wind Direction}")
]

plot.hover.formatters = {'@PSTdt': 'datetime'}

plot.scatter(x="x", y="y", size=10, source=source, color="color", line_color="white", marker='square')

#defining month tick locations
ticks = []
for i in range(12):
    ticks.append((width+2)/2 + i*(space+width))

plot.xaxis.ticker = ticks
month_labels = ['Jan', 'Feb', 'Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec']
x_labels = dict(zip(ticks, month_labels))
plot.xaxis.major_label_overrides = x_labels
plot.xaxis.axis_label = "Month"

plot.yaxis.visible = False


show(plot)

In [20]:
#defining month tick locations
ticks = []
for i in range(12):
    ticks.append((width+2)/2 + i*(space+width))

In [24]:
month_labels = ['Jan', 'Feb', 'Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec']

x_labels = dict(zip(ticks, month_labels))

In [33]:
str(df17.PSTdt[0].year)

'2017'