In [1]:
import numpy as np
import pandas as pd
import scipy

In [2]:
def get_A_b(dataframe,park,year,columns):
    """
    Obtains the desired matrix A with columns specified in the function,
    and a column matrix b filled with the run outcomes of the desired games.
    """
    df = pd.read_csv(dataframe)
    team_df = df.loc[df['ParkID'] == park,:]
    
    year_df = team_df.loc[team_df['Year'] == year,:]
    
    off_df = year_df.loc[:,columns]
    
    off_mat = np.array(off_df).astype(float) 
    A = np.insert(off_mat, 0, 1, axis=1)
    
    b_df = year_df.loc[:,['runs']]
    b = np.array(b_df).astype(float)
    return A,b

In [3]:
def get_beta(dataframe,park,year,columns):
    """
    Inputs: a dataframe (like retro.csv as a dataframe), the ParkID, 
    year you want to analyze and the columns of the DataFrame that you want to extract. 
    Returns: beta = [beta1,...,beta5].T  (we don't care about beta0)
    """
    A,b = get_A_b(dataframe,park,year,columns)
    
    beta = np.linalg.solve(A.T @ A, A.T @ b)
    
    new_beta = beta[1:]
    return new_beta

In [4]:
beta = get_beta('retrodata.csv','BOS07',2004,["BB","1B","2B","3B","HR"])
beta

array([[0.48437466],
       [0.52624633],
       [0.78184949],
       [0.46041257],
       [1.20354485]])

In [5]:
def get_beta_BOS_year(year):
    """
    Gets the beta values by using a uniform dataframe ('retrodata.csv'), park ('BOS07'),
    and columns ("BB,1B,2B,3B,HR")
    """
    beta = get_beta('retrodata.csv','BOS07',year,["BB","1B","2B","3B","HR"])
    return beta

In [6]:
def linear_weights():
    """
    Makes a DataFrame with a year column 2010-2019, and columns labelled 'BB', '1B', '2B', '3B', 'HR'
    that contains the beta values you calculated for each year for your specific park. 
    Then exports the DataFrame as a csv file
    """
    years = list(range(2010, 2020))
    BOS = []

    for year in years:
        betas = get_beta_BOS_year(year)
        BOS = np.append(BOS, betas)
    
    BOS_mat = BOS.reshape(-1, 5)
    
    BOS_df = pd.DataFrame(BOS_mat)
    BOS_df.insert(0, 'Year', years)
    
    beta_names = {
         0 : 'BB',
         1 : '1B',
         2 : '2B',
         3 : '3B',
         4 : 'HR'
    }
    
    BOS_df.rename(columns=beta_names, inplace=True)
    return BOS_df.to_csv('BOS.csv')

In [7]:
linear_weights()

In [8]:
def player_rankings():
    """
    Makes a DataFrame with 'Player' column and 'R/PA' columns that is sorted from 
    highest to lowest in the 'R/PA' column.
    Input: None
    Returns: a csv file
    """
    df = pd.read_csv('fangraphs_batting_2019.csv')
    
    Players = df["Name"]
    PA = df["PA"]
    
    PA_df = np.array(PA).astype(float)
    
    H_df = df.loc[:,["BB","1B","2B","3B","HR"]]
    X = np.array(H_df).astype(float)
    
    beta2019 = get_beta_BOS_year(2019)
    xRuns = X @ beta2019
    runs_per_PA = xRuns.T / PA_df
    
    player_df = pd.DataFrame(runs_per_PA.T)
    player_df.insert(0,"Player",Players)
    player_df.rename(columns={0 : 'R/PA'}, inplace=True)
    
    high_to_low_df = player_df.sort_values(by='R/PA',ascending=False)
    
    return high_to_low_df.to_csv('Rankings_at_Fenway.csv')

In [9]:
player_rankings()