In [10]:
import pandas as pd
import numpy as np

#importing batting data first
from pybaseball import batting_stats, batting_stats_bref, bwar_bat, statcast_batter, playerid_lookup, statcast_batter_expected_stats, statcast_batter_exitvelo_barrels
from pybaseball import cache

cache.enable()

from datetime import date

from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource, HoverTool

In [9]:
def scatter_linear(x_column, y_column, data_source):
    x = x_column.astype(float)
    y = y_column.astype(float)

    tooltips=[
        (f'{x_column.name}', '@{'f'{x_column.name}''}'),
        (f'{y_column.name}', '@{'f'{y_column.name}''}'),
    ]

    xy_plot = figure(title = f'Relationship between {x_column.name} and {y_column.name}', x_axis_label = f'{x_column.name}', y_axis_label = f'{y_column.name}', tooltips = tooltips)
    
    source = ColumnDataSource(data_source)
    xy_plot.circle(f'{x_column.name}', f'{y_column.name}', size = 5, source = source)

    coefficients = np.polyfit(x, y, 1)
    slope = coefficients[0]
    intercept = coefficients[1]
    y_values = slope * x + intercept

    xy_plot.line(x, y_values, line_color = 'red')

    output_notebook()
    show(xy_plot)
    print(f'Slope: {slope}')
    

In [8]:
class Plot:
    def __init__(self, x_column, y_column, data_source):
        self.x_values = x_column.astype(float)
        self.y_values = y_column.astype(float)
        self.x_name = x_column.name
        self.y_name = y_column.name
        
        self.source = ColumnDataSource(data_source)
        
        self.tooltips=[
            (f'{x_column.name}', '@{'f'{x_column.name}''}'),
            (f'{y_column.name}', '@{'f'{y_column.name}''}'),
        ]
        
        self.xy_plot = figure(title = f'Relationship between {x_column.name} and {y_column.name}', x_axis_label = f'{x_column.name}', y_axis_label = f'{y_column.name}', tooltips = self.tooltips)
    
    def scatter(self, regression):
        self.xy_plot.circle(f'{self.x_name}', f'{self.y_name}', size=5, source = self.source)
        if regression == False:
            output_notebook()
            show(self.xy_plot)
        return self.xy_plot
        
    def regression(self, degree):
        coefficients = np.polyfit(self.x_values, self.y_values, degree)
        poly_function = np.poly1d(coefficients)
        polyline = np.linspace(min(self.x_values), max(self.x_values), 100)

        self.xy_plot.line(polyline, poly_function(polyline), line_width=2, color='red', legend_label='Polynomial Line') #the polyline values are passed into the poly_function which creates a smooth graph

        output_notebook()
        show(self.xy_plot)

        print(poly_function)
        
        if degree == 1:
            print(f'Slope: {coefficients[0]}')        

data = batting_stats(2021, qual = 500)
data_plot = Plot(data['Age'], data['WAR'], data)
data_plot.scatter(True)
data_plot.regression(1)

 
-0.1229 x + 6.553
Slope: -0.12290231832099664


In [7]:
def adder(x, y):
    return x

def avg(x, y):
    added = adder(x, y)
    return added/2

print(avg(2, 1))

1.5


## Plotting how a player's performance changes over time
- Plot how performance changes with age
- Plot how a single player's, say Albert Pujols, performance changes with age

In [5]:
#Plotting how performance changes with age
bat_stat = batting_stats(2022, end_season = 2022, qual = 1)
bat_stat = bat_stat[['Name', 'Age', 'WAR']]
bat_stat

Unnamed: 0,Name,Age,WAR
6,Aaron Judge,30,11.5
23,Manny Machado,29,7.4
26,Nolan Arenado,31,7.3
13,Paul Goldschmidt,34,7.1
21,Freddie Freeman,32,7.1
...,...,...,...
575,Yoshi Tsutsugo,30,-1.3
372,Elias Diaz,31,-1.4
633,Robinson Cano,39,-1.4
417,Miguel Cabrera,39,-1.5


In [11]:
bat_stat = batting_stats(1990, end_season = 2022, qual = 200)
bat_stat = bat_stat[['Name', 'Age', 'WAR']]

bat_stat = bat_stat[(bat_stat['Age'] < 40) & (bat_stat['Age'] > 20)]

age_war_plot = Plot(bat_stat['Age'], bat_stat['WAR'], bat_stat)
age_war_plot.scatter(regression = True)
age_war_plot.regression(degree = 2)

           2
-0.006896 x + 0.3759 x - 3.329
