This notebook makes the plotly graphs for Berta Block routes based on Beta7 data.
1. General info about the routes - count, mean #senders, holdtype, style..
2. Information about the climbers of each route - height, grade, #attemps.

In [1]:
from bs4 import BeautifulSoup
import requests

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep

import seaborn as sns
import collections

import pandas as pd
import numpy as np
import os
    
#from scipy.ndimage import gaussian_filter
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

import plotly

# %matplotlib inline
# sns.set_style("whitegrid")
# plt.style.use('seaborn-poster')

In [2]:
base_url = 'https://beta7.app/'

In [3]:
grade_colors = collections.OrderedDict()

grade_colors['3'] = '#ffff80'
grade_colors['4'] = '#fcff33'
grade_colors['5'] = '#8fff94'
grade_colors['5+'] = '#006432'
grade_colors['5C'] = '#037d50'
grade_colors['6A'] = '#9df7f2'
grade_colors['6A+'] = '#1cacb2'
grade_colors['6B'] = '#207ea7'
grade_colors['6B+'] = '#b58de8'
grade_colors['6C'] = '#9a57a2'
grade_colors['6C+'] = '#9a23a8'
grade_colors['7A'] = '#909090'
grade_colors['7A+'] = '#616161'
grade_colors['7B'] = '#424141'
grade_colors['7B+'] = '#303030'
grade_colors['7C'] = '#000000'
grade_colors['7C+'] = '#000000'
grade_colors['8A'] = '#000000'

#grade_color_list = list(grade_colors.values())

# ### Beta7 grade colors - hard to distinguish:
# grade_colors['4'] = '#e8da00'
# grade_colors['5'] = '#00a476'
# grade_colors['6A'] = '#199a92'
# grade_colors['6A+'] = '#008db2'
# grade_colors['6B'] = '#1684ae'
# grade_colors['6B+'] = '#6d5e9c'
# grade_colors['6C'] = '#835498'
# grade_colors['6C+'] = '#914e95'
# grade_colors['7A'] = '#453152'
# grade_colors['7A+'] = '#382a44'
# grade_colors['7B'] = '#282132'
# grade_colors['7B+'] = '#12151a'
# grade_colors['7C'] = '#36101f'

#### Selenium - get all routes

In [4]:
# driver = webdriver.Firefox()

# # Go to routes page:
# driver.get(base_url + "location/bertablock/routes")
# sleep(3)

# # load more than 16 routes:
# driver.find_element("xpath", "//span[.='load more']").click()
# sleep(4)

# # scrall down:
# for i in range(60):
#     driver.execute_script("window.scrollTo(0, 50000)") 
#     sleep(0.5)

In [5]:
# # get the entire html of the routes page
# html = driver.page_source
# # close the driver 
# driver.close()

In [6]:
# # format the html to soup
# soup = BeautifulSoup(html, 'html.parser')

#### Create all routes df:
Number of sends might be different here and in route link below (as people might send in the meanwhile)

In [7]:
# all_routes_soup = soup.find_all('div', {'class':'route-container'})
# n_routes = len(all_routes_soup)
# n_routes

In [8]:
# routes_section = []
# routes_color = []
# routes_name = []
# routes_grade = []
# routes_grade_gym = []
# routes_holdtypes = []
# routes_style = []
# routes_time = []
# routes_nclimbs = []

# # Get routes link:
# routes_hrefs = [a['href'] for a in soup.find_all('a', href=True) if 'route/' in a['href']]
# if len(routes_hrefs)!=n_routes:
#     print('mismatch in #route links to #links')

# for r_soup in all_routes_soup:
#     routes_section.append(r_soup.find('span', {'class':"additional-location-information"}).text)
#     routes_color.append(r_soup.find_all('span', {'class':"color"})[-1].text)
#     routes_holdtypes.append(r_soup.find('span', {'class':"holdtypes"}).text)
#     routes_style.append([s.text.replace('\u200d','') for s in r_soup.find_all('span', {'class':"style"})])
#     routes_time.append(r_soup.find('time', {'class':"hint"}).text)

#     name = r_soup.find('span', {'class':"route-name"})
#     name = name.text if name else ''
#     routes_name.append(name)
    
#     grade = r_soup.find('span', {'class':"grades"}).text.split('/')
#     routes_grade.append(grade[0])
#     routes_grade_gym.append(grade[1])
    
#     n_climbs = r_soup.find_all('span', {'class':"climbs-count"})
#     ## "climbs-count" might be senders/beta requests/videos
#     ## if isnumeric returns false than its the number of beta requests/videos.
#     n_climbs = float(n_climbs[-1].text.rstrip()) if (
#         n_climbs and n_climbs[-1].text.rstrip().isnumeric()) else float('nan')

#     routes_nclimbs.append(n_climbs)

    

In [9]:
# df_all_routes = pd.DataFrame({
#     'section':routes_section,
#     'grade':routes_grade,
#     'grade_gym':routes_grade_gym,
#     'color':routes_color,
#     'name':routes_name,
#     'holdtypes':routes_holdtypes,
#     'styles':routes_style,
#     'time':routes_time,
#     'href':routes_hrefs,
#     'nclimbs':routes_nclimbs,
# })

In [10]:
date = '220923'

In [11]:
# df_all_routes.to_csv('{date}_all_routes.csv')

In [12]:
df_all_routes = pd.read_csv(f'{date}_all_routes.csv', index_col=0)

In [13]:
holdtypes = ["pockets","jugs","volumes","pinches","crimps","slopers"]

for h in holdtypes:
    df_all_routes[h] = df_all_routes.holdtypes.str.contains(h)


In [14]:
styles = {"💎":"compression",
          "🔬":"technique",
          "💪":"strength",
          "💣":"power",
          "🤸":"fun",
          "🧘":"balance",
          "💢":"corner",
          "💃":"footwork",
          "🤹":"coordination",
          "🚪":"arete",
          "🏄":"mantle",
          "🚀":"dyno",
          "🔥":"endurance"
         }

for key,val in styles.items():
    df_all_routes[val] = df_all_routes.apply(lambda x: key in x['styles'], axis=1)



In [15]:
df_all_routes = df_all_routes.sort_values(by=["grade"])

In [16]:
df_all_routes.head()

Unnamed: 0,section,grade,grade_gym,color,name,holdtypes,styles,time,href,nclimbs,...,power,fun,balance,corner,footwork,coordination,arete,mantle,dyno,endurance
359,Tsunami,3,WEISS,violet,,slopers,['🧘'],4 months ago,/route/jzquGG3GoYgvvQzFrKXnzvecLwm2~1653054177046,41.0,...,False,False,True,False,False,False,False,False,False,False
59,Ekcürb,3,WEISS,green,Grey's Anatomy,jugs,['🔬'],10 days ago,/route/UQGtEzKKf7NHJM6JGCdYL8LrkPl2~1663081586346,2.0,...,False,False,False,False,False,False,False,False,False,False
73,Paradies,3,WEISS,white,,jugs,['🤸'],11 days ago,/route/jzquGG3GoYgvvQzFrKXnzvecLwm2~1662994468579,6.0,...,False,True,False,False,False,False,False,False,False,False
94,Submarino,3,WEISS,blue,,jugs,['💪'],17 days ago,/route/jzquGG3GoYgvvQzFrKXnzvecLwm2~1662477059478,8.0,...,False,False,False,False,False,False,False,False,False,False
95,Enterprise,3,WEISS,cornflower,T-Rex Testpiece,jugs,['💪'],18 days ago,/route/UQGtEzKKf7NHJM6JGCdYL8LrkPl2~1662389956144,9.0,...,False,False,False,False,False,False,False,False,False,False


### Summarize

In [17]:
df_all_routes['hex_color'] = df_all_routes['grade'].map(grade_colors)

In [18]:
hist_colors = [val for key,val in grade_colors.items() if key in df_all_routes.grade.unique()]

In [63]:
fig = px.histogram(df_all_routes, x='grade', color='grade', color_discrete_sequence=hist_colors, 
             template='simple_white')#, width=900, height=400) 
                   
fig.update_layout(title_text=f'number of routes per grade (n={df_all_routes.shape[0]})', title_x=0.5)

fig.update_yaxes(title='number of routes')
fig.update_layout(showlegend=False)

#fig.show()
fig.write_html(f'{date}_nRoutes_per_grade.html', full_html=False, include_plotlyjs="cdn")

In [64]:
fig = px.bar(df_all_routes.groupby(['grade']).mean().reset_index(), x="grade", y="nclimbs", color="grade",        
             color_discrete_sequence=hist_colors, 
             template='simple_white')#, width=900, height=400)

fig.update_layout(title_text=f'mean number of sends per grade', title_x=0.5)

fig.update_yaxes(title='mean number of sends')
fig.update_layout(showlegend=False)

#fig.show()
fig.write_html(f'{date}_mean_nsends_per_grade.html', full_html=False, include_plotlyjs=False)

In [65]:
fig = px.bar(df_all_routes.sort_values(by=['grade','nclimbs']), x="grade", y="nclimbs", color="grade",        
             color_discrete_sequence=hist_colors, 
             template='simple_white')#, width=900, height=400)

fig.update_layout(title_text=f'total number of sends per grade', title_x=0.5)

fig.update_yaxes(title='total number of sends')
fig.update_layout(showlegend=False)

#fig.show()
fig.write_html(f'{date}_total_nsends_per_grade.html', full_html=False, include_plotlyjs=False)

In [27]:
df_grade_holdtype = df_all_routes.groupby(['grade'])[holdtypes].sum()
df_grade_holdtype['counter'] = df_all_routes.groupby(['grade']).size()

df_grade_holdtype = df_grade_holdtype.reset_index()

In [66]:
fig = px.bar(df_grade_holdtype, x="grade", y=holdtypes, text_auto=True,
             template='simple_white', height=500)#, width=900, height=500)

#fig.update_traces(textposition='inside')
fig.update_layout(title_text=f'hold type per grade', title_x=0.5, legend_title=None)
fig.update_yaxes(title='number of routes')

#fig.show()
fig.write_html(f'{date}_holdType_per_grade.html', full_html=False, include_plotlyjs=False)

In [67]:
df_grade_holdtype.iloc[:,1:-1] = df_grade_holdtype.iloc[:,1:-1].div(
    df_grade_holdtype.counter, axis=0).multiply(100).astype(int)

In [68]:
fig = px.bar(df_grade_holdtype, x="grade", y=holdtypes, text_auto=True,
             template='simple_white')#, width=900, height=400)

fig.update_layout(title_text=f'hold type per grade - normalized', title_x=0.5, legend_title=None)
fig.update_yaxes(title='hold type %', showticklabels=False)

#fig.show()
fig.write_html(f'{date}_holdType_per_grade_normalized.html', full_html=False, include_plotlyjs=False)

In [69]:
styles.values()

dict_values(['compression', 'technique', 'strength', 'power', 'fun', 'balance', 'corner', 'footwork', 'coordination', 'arete', 'mantle', 'dyno', 'endurance'])

In [70]:
df_grade_style = df_all_routes.groupby(['grade'])[list(styles.values())].sum()
df_grade_style['counter'] = df_all_routes.groupby(['grade']).size()

df_grade_style = df_grade_style.reset_index()

In [71]:
fig = px.bar(df_grade_style, x="grade", y=list(styles.values()), text_auto=True,
             template='simple_white',#, width=900, height=1000, 
             color_discrete_sequence=px.colors.qualitative.Light24)

#fig.update_traces(textposition='inside')
fig.update_layout(title_text=f'style per grade', title_x=0.5, legend_title=None)
fig.update_yaxes(title='number of routes')

#fig.show()
fig.write_html(f'{date}_style_per_grade.html', full_html=False, include_plotlyjs=False)

In [72]:
df_grade_style.iloc[:,1:-1] = df_grade_style.iloc[:,1:-1].div(
    df_grade_style.counter, axis=0).multiply(100).astype(int)

In [73]:
fig = px.bar(df_grade_style, x="grade", y=list(styles.values()), text_auto=True,
             template='simple_white',# width=900, height=1000, 
             color_discrete_sequence=px.colors.qualitative.Light24)

#fig.update_traces(textposition='inside')
fig.update_layout(title_text=f'style per grade - normalized', title_x=0.5, legend_title=None)
fig.update_yaxes(title='style %', showticklabels=False)

#fig.show()
fig.write_html(f'{date}_style_per_grade_normalized.html', full_html=False, include_plotlyjs=False)

#### More soups:

In [74]:
# user_url = 'https://beta7.app/user/bellonet'
# user_page = requests.get(user_url)
# soup_user = BeautifulSoup(user_page.text, 'html.parser')

### Set route plot style params

In [75]:
attempts = ['⚡', '🥈', '👊', '❓', '✔️']
attempts
#'\U0001f948' second place

['⚡', '🥈', '👊', '❓', '✔️']

In [42]:
## NOT USED - CURRENTLY NO HEIGHT AS HUE
# ## our min height - 158, max - 199  (starting a bit before min to not have white)
# minh = 156
# rang = 44

# cmap_discr = sns.color_palette("vlag", as_cmap=True)(np.linspace(0, 1, rang)) #red to blue
# cmap_discr = sns.color_palette("Blues", as_cmap=True)(np.linspace(0, 1, rang))

# height_colors = {i+minh:c for i,c in enumerate(cmap_discr)}
# height_colors[0] = np.array([0.5, 0.5, 0.5, 1.])

### Plot multiple routes

In [43]:
# # Choose routes to plot:
df_routes_to_plot = df_all_routes[(df_all_routes.grade>'6') & (df_all_routes.nclimbs>10)]
# hrefs_routes_to_plot = df_routes_to_plot["href"].to_list()

In [44]:
# Get routes html
#routes_page = [requests.get(base_url + r) for r in hrefs_routes_to_plot]

In [45]:
routes_dir = f'{date}_routes' 
# os.makedirs(routes_dir)

In [46]:
## Save each route page to txt:

# for ii,p in enumerate(routes_page):
    
#     ## get the df index of the route:
#     df_idx = df_routes_to_plot[df_routes_to_plot.href==hrefs_routes_to_plot[ii]].index[0]
    
#     with open(os.path.join(routes_dir, f'{df_idx}.txt'), 'w') as f:
#         f.write(p.text)

In [47]:
# ## Save one route page to txt:
# idx = 300

# r = df_routes_to_plot[df_routes_to_plot.index==idx]["href"].values[0]

# p = requests.get(base_url + r)

# with open(os.path.join(routes_dir, f'{idx}.txt'), 'w') as f:
#     f.write(p.text)

In [48]:
df_routes_to_plot = df_routes_to_plot.sort_values(by=["grade"])

df = pd.DataFrame()

for i in df_routes_to_plot.index:

    with open(os.path.join(routes_dir, f'{i}.txt'), 'r') as f:
        page = f.read()

    # entire page text:
    soup_route = BeautifulSoup(page)

    # Get route name - if exists
    try:
        route_name = soup_route.select_one('.route-name').text
    except:
        route_name = 'XXX'
        
    route_sector = soup_route.select_one('.sector').text

    # Get route level: 
    route_level = soup_route.select_one('.fb').text.split('/')

    # Get hold color:
    hold_color = [c.text.strip() for c in soup_route.select('.color') if c.text.strip()!=''][0]
    hold_color 
    if hold_color=='forest':
        hold_color = '#014421'
    elif hold_color in ['lemon','yellow']:
        hold_color = '#FFEA00'
    elif hold_color=='cornflower':
        hold_color='#9aceeb'
    #hold_color = soup_route.find('title').text.split(' ')[0]

    # List of climbers - full text per climber
    route_senders = [t for t in soup_route.select(".climb-container") 
             if "<span>made a beta request</span>" not in str(t)]
    
    # Their height
    route_heights = [int(s.select_one(".height").text[:-2]) if s.select_one(".height") else np.nan 
             for s in route_senders]

    # Their level
    route_levels = [s.select_one(".level").text[1:] if s.select_one(".level") else None 
         for s in route_senders]

    # Their number of attemps
    route_sendtype = [s.select_one(".sending-type").text if s.select_one(".sending-type") else '❓' 
              for s in route_senders]

    # Create the df
    df_route = pd.DataFrame({"height":route_heights, "levels":route_levels, 'sendtype':route_sendtype})
    df_route['idx'] = i
    df_route['name'] = route_name
    df_route['sector'] = route_sector
    df_route['level'] = route_level[0]
    df_route['gym_level'] = route_level[1]
    df_route['hold_color'] = hold_color
    
    df = pd.concat([df, df_route])

df = df.reset_index(drop=True)
df.shape

(5898, 9)

In [49]:
df.sendtype.unique()

array(['👊', '⚡', '🥈', '❓', '✔️', '🎫'], dtype=object)

In [50]:
df = df.drop(df[df.sendtype == '🎫'].index)

df.shape, df.sendtype.unique()

((5896, 9), array(['👊', '⚡', '🥈', '❓', '✔️'], dtype=object))

In [51]:
df = df.reset_index(drop=True)

In [52]:
with open(f'{date}_height_summary_table.html', 'w') as f:
    df.groupby(['level']).agg({'height':["mean","median","min","max"]}
                             ).astype(int).sort_values(by=['level']).to_html(f, classes="table")

In [53]:
## Bin height

bins = range(155,201,5)
height_labels = [f'~{b+2}' for b in bins][:-1]
df['height_binned'] = pd.cut(df['height'], bins, labels=height_labels)

In [54]:
## create a column where I map route level as 0 for the climbers levels:
levels = df.levels.sort_values().unique()

levels_temp_dict = dict([(l,i) for i,l in enumerate(levels)])

df["levels_mapped"] = df["levels"].map(levels_temp_dict) - df["level"].map(levels_temp_dict)

In [55]:
df.head()

Unnamed: 0,height,levels,sendtype,idx,name,sector,level,gym_level,hold_color,height_binned,levels_mapped
0,180.0,6B,👊,157,XXX,Competizione,6A,BLAU,violet,~177,2
1,192.0,6C,👊,157,XXX,Competizione,6A,BLAU,violet,~192,4
2,,7B+,⚡,157,XXX,Competizione,6A,BLAU,violet,,9
3,185.0,7B,⚡,157,XXX,Competizione,6A,BLAU,violet,~182,8
4,169.0,6C,🥈,157,XXX,Competizione,6A,BLAU,violet,~167,4


### Plot means per grade

#### heat maps

In [76]:
routes_levels = df.level.sort_values().unique()

for i in range(8):

    df_grade = df[df.level == routes_levels[i]]

    lev = pd.Categorical(df_grade['levels'], categories=list(grade_colors.keys())[4:-1])
    hei = pd.Categorical(df_grade['height_binned'], categories=height_labels)

    confusion_matrix = pd.crosstab(lev, hei, dropna=False) 
    confusion_matrix.columns.name = 'height'
    confusion_matrix.index.name = 'grade'
    
    ## not using smoothing as it lowers n by much
    #### np.sum(gaussian_filter(confusion_matrix.astype(float), 0.1).astype(int))

    fig = px.imshow(confusion_matrix, color_continuous_scale="BuPu", width=400, height=400)
    fig.update_layout(title_text=f'{df_grade.level.iat[0]}  (n sends={df_grade.shape[0]})', title_x=0.5)
    fig.update_xaxes(title=None)
    fig.update_yaxes(title=None)
    
    #fig.show()
    if i==0:
        fig.write_html(f'{date}_heatmap_xLevel_yHeight_hovAttempts_{routes_levels[i]}.html', full_html=False, include_plotlyjs="cdn")
    else:
        fig.write_html(f'{date}_heatmap_xLevel_yHeight_hovAttempts_{routes_levels[i]}.html', full_html=False, include_plotlyjs=False)


In [77]:
## inverse confusion matrix:

routes_levels = df.level.sort_values().unique()

for i in range(8):

    df_grade = df[df.level == routes_levels[i]]

    lev = pd.Categorical(df_grade['levels'], categories=list(grade_colors.keys())[4:-1])
    hei = pd.Categorical(df_grade['height_binned'], categories=height_labels)

    confusion_matrix = pd.crosstab(lev, hei, dropna=False).T
    confusion_matrix.columns.name = 'grade'
    confusion_matrix.index.name = 'height'
    confusion_matrix = confusion_matrix.loc[::-1]
    
    ## not using smoothing as it lowers n by much
    #### np.sum(gaussian_filter(confusion_matrix.astype(float), 0.1).astype(int))

    fig = px.imshow(confusion_matrix, color_continuous_scale="BuPu", width=400, height=400)
    fig.update_layout(title_text=f'{df_grade.level.iat[0]}  (n sends={df_grade.shape[0]})', title_x=0.5)
    fig.update_xaxes(title=None)
    fig.update_yaxes(title=None)
    
    #fig.show()
    if i==0:
        fig.write_html(f'{date}_heatmapT_xLevel_yHeight_hovAttempts_{routes_levels[i]}.html', full_html=False, include_plotlyjs="cdn")
    else:
        fig.write_html(f'{date}_heatmapT_xLevel_yHeight_hovAttempts_{routes_levels[i]}.html', full_html=False, include_plotlyjs=False)


#### route level mapped to zero in sendlevel histograms

In [86]:
fig = ff.create_distplot([df["levels_mapped"]], ['6A-7A+'], bin_size=1, curve_type='normal',
                         show_curve=True, show_rug=False, colors=['red'])
fig.update_layout(template="none", title_text='senders grade around the route grade - for 6A-7A+ routes')
fig.update_xaxes(ticktext=[-2,-1,'route grade',1,2,3,4,5,6,7,8,9,10,11], 
                 tickvals=[-1.5,-0.5,0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])

fig.update_xaxes(title='senders grade')
fig.update_yaxes(title='senders ratio')

#fig.show()

fig.write_html(f'{date}_sendLevel_hist_around_route_grade_allGrades.html', full_html=False, include_plotlyjs="cdn")

In [114]:
routes_levels = df.level.sort_values().unique()

hist_list = []
for i,l in enumerate(routes_levels):
    hist_list.append(df[df.level == l]["levels_mapped"])
    

fig = ff.create_distplot(hist_list, routes_levels, 
                         bin_size=1, show_curve=True, show_rug=False, show_hist=False, curve_type='normal',
                        colors=list(grade_colors.values())[5:13]) 
fig.update_layout(template="none",
                  title_text='senders grade around the route grade - normal fit curve - 6A-7A+ routes')
fig.update_xaxes(ticktext=[-2,-1,'route grade',1,2,3,4,5,6,7,8,9,10,11], 
                 tickvals=[-1.5,-0.5,0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])

fig.update_xaxes(title='senders grade')
fig.update_yaxes(title='senders ratio')

fig.write_html(f'{date}_sendLevel_hist_around_route_grade_allGrades_curves.html', full_html=False, include_plotlyjs=False)
#fig.show()

In [115]:
levels_range = list(df.levels.dropna().sort_values().unique())
## add grades that dont exist in the senders 
levels_range += ['-','-','-','-','-','-','-','-','-']

In [116]:
routes_levels = df.level.sort_values().unique()

for i,l in enumerate(routes_levels):
    hist_list = [df[df.level == l]["levels_mapped"]]
    

    fig = ff.create_distplot(hist_list, [l], 
                             bin_size=1, show_curve=False, show_rug=False, show_hist=True, curve_type='normal',
                            colors=[list(grade_colors.values())[i+5]]) 
    fig.update_layout(template="none",
                      title_text=f'senders grade around the route grade - {l} routes',
                     xaxis_range=[-2,12])
    fig.update_xaxes(ticktext=levels_range[levels_range.index(l)-2:levels_range.index(l)+12], 
                     tickvals=[-1.5,-0.5,0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
    
    fig.update_xaxes(title='senders grade')
    fig.update_yaxes(title='senders ratio')
    
    fig.write_html(f'{date}_sendLevel_hist_around_route_grade_{l}.html', full_html=False, include_plotlyjs=False)

    #fig.show()

### Individual route data

In [51]:
sectors = df["sector"].unique().tolist()

#### Heat Maps  normalized by grade

In [163]:
def plot_heatmap_normed(level):
    df_plot = df[df.level==level]
    idxs = df_plot.idx.unique()
    
    sector_formatted = sector.lower().replace(" ","").replace("'","")

    for i,idx in enumerate(idxs):
        df_route = df_plot[df_plot.idx == idx]
        df_route = df_route.sort_values(by=["levels"])

        lev = pd.Categorical(df_route['levels'], categories=list(grade_colors.keys())[4:-1])
        hei = pd.Categorical(df_route['height_binned'], categories=height_labels)
        confusion_matrix_route = pd.crosstab(lev, hei, dropna=False) 
        
        ### get grade confusion matrix - for normalization:
        df_grade = df[df.level == df_route.level.iat[0]]
        lev = pd.Categorical(df_grade['levels'], categories=list(grade_colors.keys())[4:-1])
        hei = pd.Categorical(df_grade['height_binned'], categories=height_labels)
        confusion_matrix_grade = pd.crosstab(lev, hei, dropna=False) 
        
        confusion_matrix = (confusion_matrix_route/confusion_matrix_grade).fillna(value=0)

        x = list(confusion_matrix.index)
        y = list(confusion_matrix.columns)

        cm_hov_text = confusion_matrix.copy()
    
        for xx in x:
            for yy in y:
                if cm_hov_text.loc[xx,yy]!=0:
                    cm_hov_text.loc[xx,yy] = f'Normalized: {cm_hov_text.loc[xx,yy]:.3f}<br>Absolute: {confusion_matrix_route.loc[xx,yy]}<br>' + \
                    df_route[(df_route.levels==xx) & 
                                                      (df_route.height_binned==yy)
                                                     ]["sendtype"].value_counts().to_string().replace(' ','')
                else:
                    cm_hov_text.loc[xx,yy] = ''
                    
        data = [plotly.graph_objs.Heatmap(z=confusion_matrix.T,
                                          x=x, y=y,
                                          hoverinfo='text',
                                          colorscale="BuPu",
                                          text=cm_hov_text.T)]

        layout = plotly.graph_objs.Layout(autosize=False,
                                          title_text=f'{df_route.name.iat[0]} - {df_route.level.iat[0]} - {df_route.sector.iat[0]} (n={df_route.shape[0]})',
                                          title_x=0.5,  title_font_color=df_route.hold_color.iat[0],
                                          width=350,
                                          height=350)


        fig = plotly.graph_objs.Figure(data=data, layout=layout)
        ##fig.show()
        if i==0:
            fig.write_html(f'{date}_heatmap_normed_xLevel_yHeight_hovAttempts_{level}_{i:02d}.html', full_html=False, include_plotlyjs="cdn")
        else:
            fig.write_html(f'{date}_heatmap_normed_xLevel_yHeight_hovAttempts_{level}_{i:02d}.html', full_html=False, include_plotlyjs=False)

In [164]:
for l in df.level.unique():
    plot_heatmap_normed(l)

#### Heat maps

In [167]:
def plot_heatmap(sector):
    df_plot = df[df.sector==sector]
    idxs = df_plot.idx.unique()
    
    sector_formatted = sector.lower().replace(" ","").replace("'","")

    for i,idx in enumerate(idxs):
        df_route = df_plot[df_plot.idx == idx]
        df_route = df_route.sort_values(by=["levels"])

        lev = pd.Categorical(df_route['levels'], categories=list(grade_colors.keys())[4:-1])
        hei = pd.Categorical(df_route['height_binned'], categories=height_labels)

        confusion_matrix = pd.crosstab(lev, hei, dropna=False) 

        x = list(confusion_matrix.index)
        y = list(confusion_matrix.columns)

        cm_hov_text = confusion_matrix.copy()

        for xx in x:
            for yy in y:
                if cm_hov_text.loc[xx,yy]!=0:
                    cm_hov_text.loc[xx,yy] = f'#sends: {confusion_matrix.loc[xx,yy]}<br>' + \
                        df_route[(df_route.levels==xx) & 
                                                      (df_route.height_binned==yy)
                                                     ]["sendtype"].value_counts().to_string().replace(' ','')
                else:
                    cm_hov_text.loc[xx,yy] = ''
                    
        data = [plotly.graph_objs.Heatmap(z=confusion_matrix.T,
                                          x=x, y=y,
                                          hoverinfo='text',
                                          colorscale="BuPu",
                                          text=cm_hov_text.T)]

        layout = plotly.graph_objs.Layout(autosize=False,
                                          title_text=f'{df_route.name.iat[0]} - {df_route.level.iat[0]} - {df_route.sector.iat[0]} (n={df_route.shape[0]})',
                                          title_x=0.5,  title_font_color=df_route.hold_color.iat[0],
                                          width=350,
                                          height=350)


        fig = plotly.graph_objs.Figure(data=data, layout=layout)
        #fig.show()
        if i==0:
            fig.write_html(f'{date}_heatmap_xLevel_yHeight_hovAttempts_{sector_formatted}_{i:02d}.html', full_html=False, include_plotlyjs="cdn")
        else:
            fig.write_html(f'{date}_heatmap_xLevel_yHeight_hovAttempts_{sector_formatted}_{i:02d}.html', full_html=False, include_plotlyjs=False)

In [168]:
for s in sectors:
    plot_heatmap(s)

#### X attempts, Y height, Hue sender grade

In [None]:
def plot_xAtt_yHei_hueLev(sector):
    
    df_plot = df[df.sector==sector]
    idxs = df_plot.idx.unique()
    
    sector_formatted = sector.lower().replace(" ","").replace("'","")

    for i,idx in enumerate(idxs):
        df_route = df_plot[df_plot.idx == idx]
        df_route = df_route.sort_values(by=["levels"])

        colors = [val for key,val in grade_colors.items() if key in df_route.levels.unique()]

        fig = px.strip(df_route, x='sendtype', y='height', color='levels', 
                       category_orders={"sendtype":attempts},
                      template='none', width=350, height=350,
                       color_discrete_sequence=colors)

        fig.update_layout(title=f'{df_route.name.iat[0]} - {df_route.level.iat[0]} - {df_route.sector.iat[0]} (n={df_route.shape[0]})',
                         title_x=0.5, title_font_color=df_route.hold_color.iat[0],
                          legend_title=None)
        
        fig.update_xaxes(tickfont_size=20)

        #fig.show()
        if i==0:
            fig.write_html(f'{date}_xAttempts_yHeight_hueLevel_{sector_formatted}_{i:02d}.html', full_html=False, include_plotlyjs="cdn")
        else:
            fig.write_html(f'{date}_xAttempts_yHeight_hueLevel_{sector_formatted}_{i:02d}.html', full_html=False, include_plotlyjs=False)
        
        

In [None]:
for s in sectors:
    plot_xAtt_yHei_hueLev(s)

#### X sender grade, Y height, Hue attempts

In [None]:
def plot_xLev_yHei_hueAtt(sector):
    df_plot = df[df.sector==sector]
    idxs = df_plot.idx.unique()
    
    sector_formatted = sector.lower().replace(" ","").replace("'","")
    
    for i,idx in enumerate(idxs):
        df_route = df_plot[df_plot.idx == idx]
        df_route = df_route.sort_values(by=["levels"])

        fig = px.strip(df_route, x='levels', y='height', color='sendtype', 
                      template='none', width=350, height=350,)
                       #color_discrete_sequence=colors)

        fig.update_layout(title=f'{df_route.name.iat[0]} - {df_route.level.iat[0]} - {df_route.sector.iat[0]} (n={df_route.shape[0]})',
                         title_x=0.5, title_font_color=df_route.hold_color.iat[0],
                          legend_title=None)
        
        #fig.show()

        if i==0:
            fig.write_html(f'{date}_xLevels_yHeight_hueAtt_{sector_formatted}_{i:02d}.html', full_html=False, include_plotlyjs="cdn")
        else:
            fig.write_html(f'{date}_xLevels_yHeight_hueAtt_{sector_formatted}_{i:02d}.html', full_html=False, include_plotlyjs=False)
        

In [None]:
for s in sectors:
    plot_xLev_yHei_hueAtt(s)