In [1]:
import pandas as pd
from pathlib import Path
from datetime import timedelta
import numpy as np

from bokeh.io import show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Range1d

In [2]:
root_folder = Path.cwd().parents[1]

df = pd.read_csv(root_folder/'data/processed'/'01-final.csv')

df['PSTdt'] = pd.to_datetime(df['PST'], utc=True)-timedelta(hours=8)

df['Month'] = df['PSTdt'].dt.month

df17 = df[df.PSTdt.dt.year == 2017]

sorted = df17.sort_values(by=['Month','Score', 'PSTdt'], axis = 0)

jan = sorted[sorted['PSTdt'].dt.month==1].reset_index(drop=True)

In [8]:
jan.head()

Unnamed: 0,PST,Tide,Height,Deg,Period,Wind Speed,Wind Direction,Ideal Wind,Ideal Period,Ideal Tide,Ideal Swell,Score,PSTdt,Month
0,2017-01-06 12:00:00-08:00,1.87,1.26,216.0,4.0,6.0,170,0,0,0,0,0,2017-01-06 12:00:00+00:00,1
1,2017-01-07 10:00:00-08:00,1.71,2.15,110.0,4.0,6.0,70,0,0,0,0,0,2017-01-07 10:00:00+00:00,1
2,2017-01-07 11:00:00-08:00,1.07,2.16,113.0,4.0,6.0,60,0,0,0,0,0,2017-01-07 11:00:00+00:00,1
3,2017-01-07 12:00:00-08:00,0.95,1.92,113.0,4.0,7.0,140,0,0,0,0,0,2017-01-07 12:00:00+00:00,1
4,2017-01-07 13:00:00-08:00,0.63,1.84,115.0,4.0,6.0,140,0,0,0,0,0,2017-01-07 13:00:00+00:00,1


In [3]:
jan['nth hour'] = jan.index.to_list()
jan['nth hour'] = jan['nth hour'].apply(lambda x: float(x))+1

In [4]:
#define width of bar
width = 5

#create x-y positions
jan['x'] = (jan['nth hour'] % width)+1
jan['y'] = np.ceil(jan['nth hour']/width)

In [36]:
jan.head()

Unnamed: 0,PST,Tide,Height,Deg,Period,Wind Speed,Wind Direction,Ideal Wind,Ideal Period,Ideal Tide,Ideal Swell,Score,PSTdt,Month,nth hour,x,y
0,2017-01-06 12:00:00-08:00,1.87,1.26,216.0,4.0,6.0,170,0,0,0,0,0,2017-01-06 12:00:00+00:00,1,1.0,1.0,1.0
1,2017-01-07 10:00:00-08:00,1.71,2.15,110.0,4.0,6.0,70,0,0,0,0,0,2017-01-07 10:00:00+00:00,1,2.0,2.0,1.0
2,2017-01-07 11:00:00-08:00,1.07,2.16,113.0,4.0,6.0,60,0,0,0,0,0,2017-01-07 11:00:00+00:00,1,3.0,0.0,1.0
3,2017-01-07 12:00:00-08:00,0.95,1.92,113.0,4.0,7.0,140,0,0,0,0,0,2017-01-07 12:00:00+00:00,1,4.0,1.0,2.0
4,2017-01-07 13:00:00-08:00,0.63,1.84,115.0,4.0,6.0,140,0,0,0,0,0,2017-01-07 13:00:00+00:00,1,5.0,2.0,2.0


gray #808080
dodgerblue #1E90FF
limegreen #32CD32
ornage #FFA500
orangered #FF4500

In [37]:
jan['color'] = ''
jan.loc[jan['Score']==0, 'color'] = "#808080"
jan.loc[jan['Score']==1, 'color'] = "#1E90FF"
jan.loc[jan['Score']==2, 'color'] = "#32CD32"
jan.loc[jan['Score']==3, 'color'] = "#FFA500"
jan.loc[jan['Score']==4, 'color'] = "#FF4500"

In [47]:
source = ColumnDataSource(data=jan)


plot = figure(
    title=None, width=500, height=500,
    min_border=0, toolbar_location=None,
    x_range = Range1d(0, 20),
    #y_range = Range1d(0,20)
    )

plot.scatter(x="x", y="y", size=20, source=source, color="color", line_color="white")

show(plot)

Great success! Now going to try it with the entire year

In [3]:
df17 = sorted.reset_index(drop=True)
df17['nth hour'] = df17.index.to_list()
df17['nth hour'] = df17['nth hour'].apply(lambda x: float(x))+1

In [7]:
counts = df17.groupby('Month').count()
counts


Unnamed: 0_level_0,PST,Tide,Height,Deg,Period,Wind Speed,Wind Direction,Ideal Wind,Ideal Period,Ideal Tide,Ideal Swell,Score,PSTdt,nth hour
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,341,341,341,341,341,341,341,341,341,341,341,341,341,341
2,332,332,332,332,332,332,331,332,332,332,332,332,332,332
3,401,401,401,401,401,401,401,401,401,401,401,401,401,401
4,417,417,417,417,417,417,417,417,417,417,417,417,417,417
5,465,465,465,465,465,465,465,465,465,465,465,465,465,465
6,450,450,450,450,450,450,450,450,450,450,450,450,450,450
7,465,465,465,465,465,464,464,465,465,465,465,465,465,465
8,450,450,450,450,450,450,450,450,450,450,450,450,450,450
9,390,390,390,390,390,386,386,390,390,390,390,390,390,390
10,379,379,379,379,379,379,378,379,379,379,379,379,379,379


In [8]:

counts['count']=counts['PST']


In [10]:

counts = counts.reset_index()[['Month','count']]
row = {'Month': 0, 'count':0}
pd.DataFrame(data=row)


ValueError: If using all scalar values, you must pass an index

In [None]:
counts = pd.concat([counts,row], ignore_index=True)

In [7]:
n=0
for i in range(13):
    n += counts.loc[i,'count']
    counts.loc[i,'count'] = n

In [8]:
counts.loc[12, 'count'] = 0

In [9]:
counts

Unnamed: 0,Month,count
0,1,341
1,2,673
2,3,1074
3,4,1491
4,5,1956
5,6,2406
6,7,2871
7,8,3321
8,9,3711
9,10,4090


In [10]:
counts = counts.shift(2).fillna(0)
counts.Month = counts.index.to_list()
counts

Unnamed: 0,Month,count
0,0,0.0
1,1,0.0
2,2,341.0
3,3,673.0
4,4,1074.0
5,5,1491.0
6,6,1956.0
7,7,2406.0
8,8,2871.0
9,9,3321.0


In [11]:
df17 = df17.merge(counts, on = "Month")

In [12]:
#define width of bar and space between bars
width = 5
space = 5

#create x-y positions
df17['x'] = (df17['nth hour'] % width)
df17['x'] += 1+(df['Month']-1)*width + (df['Month']-1)*space
df17['y'] = np.ceil((df17['nth hour']-df17['count'])/width)

In [13]:
df17['color'] = ''
df17.loc[df17['Score']==0, 'color'] = "#808080"
df17.loc[df17['Score']==1, 'color'] = "#1E90FF"
df17.loc[df17['Score']==2, 'color'] = "#32CD32"
df17.loc[df17['Score']==3, 'color'] = "#FFA500"
df17.loc[df17['Score']==4, 'color'] = "#FF4500"

In [14]:
df17.columns

Index(['PST', 'Tide', 'Height', 'Deg', 'Period', 'Wind Speed',
       'Wind Direction', 'Ideal Wind', 'Ideal Period', 'Ideal Tide',
       'Ideal Swell', 'Score', 'PSTdt', 'Month', 'nth hour', 'count', 'x', 'y',
       'color'],
      dtype='object')

In [None]:
source = ColumnDataSource(data=df17)

TITLE = 'Hourly Wave Quality in Isla Vista ' + str(df17.PSTdt[0].year)
TOOLS = "hover,wheel_zoom,box_zoom,reset,save"

plot = figure(tools=TOOLS, toolbar_location='above',
    title=TITLE, width=1000, height=500,
    min_border=0,
    #x_range = Range1d(0, 20),
    #y_range = Range1d(0,5000)
    )
plot.title.text_align = 'center'
plot.title.text_font_size = '20px'

plot.hover.tooltips = [
    ("time", "@PSTdt{%m/%d at %H:00}"),
    ("score", "@Score"),
    ("total hours with this score", "@totals"),
    ("tide (ft)", "@Tide"),
    ("swell height (m)", "@Height"),
    ("swell direction", "@Deg"),
    ("swell period (s)", "@Period"),
    ("wind speed (mph)", "@{Wind Speed}"),
    ("wind direction", "@{Wind Direction}")
]

plot.hover.formatters = {'@PSTdt': 'datetime'}

plot.scatter(x="x", y="y", size=10, source=source, color="color", line_color="white", marker='square')

#creating month ticks

#defining locations
ticks = []
for i in range(12):
    ticks.append((width+2)/2 + i*(space+width))
plot.xaxis.ticker = ticks

#writing labels
month_labels = ['Jan', 'Feb', 'Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec']
x_labels = dict(zip(ticks, month_labels))
plot.xaxis.major_label_overrides = x_labels
# plot.xaxis.axis_label = "Month"
# plot.xaxis.axis_label_text_font_style = 'bold'
# plot.xaxis.axis_label_text_font_size = '15px'
plot.xaxis.major_label_text_font_style = 'bold'
plot.xaxis.major_label_text_font_size = '15px'

#hiding grid and y-axis
plot.yaxis.visible = False
plot.xgrid.visible = False
plot.ygrid.visible = False

plot.y_range.range_padding = 0.02


show(plot)

In [20]:
#defining month tick locations
ticks = []
for i in range(12):
    ticks.append((width+2)/2 + i*(space+width))

In [16]:
month_labels = ['Jan', 'Feb', 'Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec']

x_labels = dict(zip(ticks, month_labels))

In [17]:
str(df17.PSTdt[0].year)

'2017'

In [18]:
df17

Unnamed: 0,PST,Tide,Height,Deg,Period,Wind Speed,Wind Direction,Ideal Wind,Ideal Period,Ideal Tide,Ideal Swell,Score,PSTdt,Month,nth hour,count,x,y,color
0,2017-01-06 12:00:00-08:00,1.87,1.26,216.0,4.0,6.0,170,0,0,0,0,0,2017-01-06 12:00:00+00:00,1,1.0,0.0,2.0,1.0,#808080
1,2017-01-07 10:00:00-08:00,1.71,2.15,110.0,4.0,6.0,70,0,0,0,0,0,2017-01-07 10:00:00+00:00,1,2.0,0.0,3.0,1.0,#808080
2,2017-01-07 11:00:00-08:00,1.07,2.16,113.0,4.0,6.0,60,0,0,0,0,0,2017-01-07 11:00:00+00:00,1,3.0,0.0,4.0,1.0,#808080
3,2017-01-07 12:00:00-08:00,0.95,1.92,113.0,4.0,7.0,140,0,0,0,0,0,2017-01-07 12:00:00+00:00,1,4.0,0.0,5.0,1.0,#808080
4,2017-01-07 13:00:00-08:00,0.63,1.84,115.0,4.0,6.0,140,0,0,0,0,0,2017-01-07 13:00:00+00:00,1,5.0,0.0,1.0,1.0,#808080
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4764,2017-12-29 09:00:00-08:00,3.89,1.45,286.0,11.0,0.0,calm,1,1,1,1,4,2017-12-29 09:00:00+00:00,12,4765.0,4428.0,111.0,68.0,#FF4500
4765,2017-12-29 10:00:00-08:00,2.56,1.33,285.0,11.0,3.0,30,1,1,1,1,4,2017-12-29 10:00:00+00:00,12,4766.0,4428.0,112.0,68.0,#FF4500
4766,2017-12-30 09:00:00-08:00,4.94,1.20,293.0,11.0,7.0,50,1,1,1,1,4,2017-12-30 09:00:00+00:00,12,4767.0,4428.0,113.0,68.0,#FF4500
4767,2017-12-30 10:00:00-08:00,3.40,1.19,288.0,11.0,0.0,calm,1,1,1,1,4,2017-12-30 10:00:00+00:00,12,4768.0,4428.0,114.0,68.0,#FF4500


In [19]:
score_counts = df17.groupby(['Month', 'Score']).count()

In [20]:
score_counts['totals'] = score_counts['PST']
score_counts = score_counts['totals'].reset_index()

In [21]:
df17 = df17.merge(score_counts, left_on=['Month', 'Score'], right_on=['Month','Score'])