In [127]:
import altair as alt
from vega_datasets import data
import pandas as pd
import openpyxl

In [128]:
# read in data
player_data = pd.read_csv("Player_Data.txt")

In [129]:
# function to clarify position names
def check_pos(pos):
    if len(pos) > 2:
        pos = "Hybrid"
    if pos == 'C':
        pos = "Center"
    elif pos == 'PF':
        pos = "Power Forward"
    elif pos == 'SF':
        pos = "Small Forward"
    elif pos == 'SG':
        pos = "Shooting Guard"
    elif pos == 'PG':
        pos = "Point Guard"
    return pos

# clean data
player_data = player_data.drop(["Player-additional"], axis=1)
player_data.rename(columns={'Pos':'Position'}, inplace=True)

# adjust names of positions for clarity
player_data.Position = player_data.Position.apply(lambda x: check_pos(x))

# create points per shot column
player_data["Points/FGA"] = round(player_data.PTS / player_data.FGA, 2)

# remove players with less than 4 shots per game (not impactful data) and no points per shot (missed all shots)
player_data = player_data.drop(player_data[player_data.FGA < 4].index)
player_data = player_data.drop(player_data[player_data["Points/FGA"] == 0].index)

In [130]:
player_data

Unnamed: 0,Player,Position,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Points/FGA
0,Precious Achiuwa,Center,22,TOR,73,28,23.6,3.6,8.3,0.439,...,2.0,4.5,6.5,1.1,0.5,0.6,1.2,2.1,9.1,1.10
1,Steven Adams,Center,28,MEM,76,75,26.3,2.8,5.1,0.547,...,4.6,5.4,10.0,3.4,0.9,0.8,1.5,2.0,6.9,1.35
2,Bam Adebayo,Center,24,MIA,56,56,32.6,7.3,13.0,0.557,...,2.4,7.6,10.1,3.4,1.4,0.8,2.6,3.1,19.1,1.47
3,Santi Aldama,Power Forward,21,MEM,32,0,11.3,1.7,4.1,0.402,...,1.0,1.7,2.7,0.7,0.2,0.3,0.5,1.1,4.1,1.00
4,LaMarcus Aldridge,Center,36,BRK,47,12,22.3,5.4,9.7,0.550,...,1.6,3.9,5.5,0.9,0.3,1.0,0.9,1.7,12.9,1.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
594,Christian Wood,Center,26,HOU,68,67,30.8,6.5,12.9,0.501,...,1.6,8.5,10.1,2.3,0.8,1.0,1.9,2.5,17.9,1.39
600,Thaddeus Young,Power Forward,33,TOT,52,1,16.3,2.7,5.2,0.518,...,1.5,2.5,4.0,2.0,1.0,0.3,1.0,1.6,6.2,1.19
601,Trae Young,Point Guard,23,ATL,76,76,34.9,9.4,20.3,0.460,...,0.7,3.1,3.7,9.7,0.9,0.1,4.0,1.7,28.4,1.40
602,Omer Yurtseven,Center,23,MIA,56,12,12.6,2.3,4.4,0.526,...,1.5,3.7,5.3,0.9,0.3,0.4,0.7,1.5,5.3,1.20


In [134]:
# code assistance from https://altair-viz.github.io/gallery/stripplot.html
stripplot =  alt.Chart(player_data, title="NBA Scoring Efficiency by Position (Per Game)", width=120).mark_circle().encode(
    x=alt.X(
        'jitter:Q',
        title=None,
        axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
        scale=alt.Scale(),
    ),
    y=alt.Y('Points/FGA', title="Points per Field Goal Attempt"),
    color=alt.Color('Position', legend=None),
    column=alt.Column(
        'Position',
        header=alt.Header(
            titleOrient='top',
            labelOrient='bottom',
            labelAlign='center',
        ),
    ),
    tooltip=['Player', "Points/FGA", "PTS", "FGA", "MP"]
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
).configure_view(
    stroke=None
).interactive()

stripplot

In [132]:
# code assistance from https://altair-viz.github.io/gallery/interactive_cross_highlight.html
pts = alt.selection(type="single", encodings=['x'])

# create heatmap of players within different bins
rect = alt.Chart(player_data, title="Rebounds and Assists by Position").mark_rect().encode(
    alt.X('AST', title="Assists per Game", bin=True),
    alt.Y('TRB', title="Rebounds per Game", bin=True),
    alt.Color('count()',
        scale=alt.Scale(scheme='greenblue'),
        legend=alt.Legend(title='Total Players')
    ),
    tooltip="count()"
)

# create circles to overlay based on bar chart selection
circ = rect.mark_point().encode(
    alt.ColorValue('grey'),
    alt.Size('count()',
        legend=alt.Legend(title='Players in Selection')
    )
).transform_filter(
    pts
)

# create the bar chart to link to the heatmap
bar = alt.Chart(player_data).mark_bar().encode(
    x='Position',
    y=alt.Y('count()', title="Number of Players"),
    color=alt.condition(pts, alt.ColorValue("steelblue"), alt.ColorValue("grey"))
).properties(
    width=550,
    height=200
).add_selection(pts)

# put the three charts together
alt.vconcat(
    rect + circ,
    bar
).resolve_legend(
    color="independent",
    size="independent"
)