In [6]:
import pandas as pd
import joblib
import numpy as np

df_encoded = pd.read_csv('New_Box_Scores_Filtered.csv')

columns_encoded = ['Opposing Team', 'Opposing Pitcher', 'Team', 'Name']

encoders = {}
for col in columns_encoded:
    encoders[col] = joblib.load(f'{col}_encoder.joblib')

avg_columns = [col for col in df_encoded.columns if col.endswith('Avg')]

target_columns = ['H-A', 'BB-A', 'H+R+RBI', 'RBI', 'R', 'SO', 'SO-A', 'W/L', 'HR']

def get_user_input(encoders):
    user_input = {}

    input_prompts = {
        'Home/Away': 'Home/Away',
        'Year': 'Year',
        'Month': 'Month (1-12)',
        'Day': 'Day (1-31)',
        'Opposing Team': 'Opposing Team',
        'Opposing Pitcher': 'Opposing Pitcher',
        'Team': 'Team',
        'Name': 'Name'
    }

    for col, prompt in input_prompts.items():
        while True:
            value = input(f"{prompt}: ")
            try:
                if col == 'Home/Away':
                    if value.lower() == 'home':
                        user_input[col] = 1
                        break
                    elif value.lower() == 'away':
                        user_input[col] = 0
                        break
                    else:
                        print("Please enter either 'Home' or 'Away'.")
                        continue
                elif col == 'Year':
                    year = int(value)
                    if 2000 <= year <= 2050:
                        user_input[col] = year
                        break
                    else:
                        print("Please input a valid year (2000-2050).")
                elif col == 'Month':
                    month = int(value)
                    if 1 <= month <= 12:
                        user_input[col] = month
                        break
                    else:
                        print("Please input a valid month (1-12).")
                elif col == 'Day':
                    day = int(value)
                    if 1 <= day <= 31:
                        user_input[col] = day
                        break
                    else:
                        print("Please input a valid day (1-31).")
                elif col in encoders:
                    le = encoders[col]
                    if value in le.classes_:
                        user_input[f'{col} Encoded'] = le.transform([value])[0]
                        break
                    else:
                        print(f"No encoded correspondent found for '{value}' in column '{col}'.")
                        valid_values = le.classes_
                        pd.set_option('display.max_rows', None)
                        print(f"Please refer to the list of valid {col} values:\n{pd.Series(valid_values).to_string(index=False)}")
                        pd.reset_option('display.max_rows')
                else:
                    user_input[col] = int(value)
                    break
            except ValueError:
                print(f"Invalid input. Please enter a valid value.")

    return user_input

def fetch_avg_columns(name_encoded, avg_columns):
    df = pd.read_csv('New_Box_Scores_Filtered.csv')
    df['Date'] = pd.to_datetime(df[['Year', 'Month', 'Day']])
    latest_row = df[df['Name Encoded'] == name_encoded].sort_values(by='Date', ascending=False).iloc[0]
    avg_columns_dict = {col: latest_row[col] for col in avg_columns}
    
    return avg_columns_dict

def predict_player_stats(user_input, avg_columns, feature_columns, target_columns):
    input_data = {**user_input, **avg_columns}
    input_df = pd.DataFrame([input_data])
    
    for col in feature_columns:
        if col not in input_df.columns:
            input_df[col] = 0
    
    input_df = input_df[feature_columns]
    model = joblib.load('basic_mlb_player_stats_linear_model.joblib')
    predictions = model.predict(input_df)
    predictions_dict = {target: predictions[0][i] for i, target in enumerate(target_columns)}
    
    return predictions_dict

def format_predictions(predictions):
    rename_dict = {
        'H-A': 'Hits Against',
        'BB-A': 'Walks Against',
        'H+R+RBI': 'Hits + Runs + RBIs',
        'RBI': 'RBIs',
        'R': 'Runs',
        'SO': 'Batting Strikeouts',
        'SO-A': 'Pitching Strikeouts',
        'W/L': 'Wins',
        'HR': 'Home Runs'
    }

    formatted_predictions = []
    for key, value in predictions.items():
        new_key = rename_dict.get(key, key)
        new_value = max(0, round(value, 2))
        formatted_predictions.append(f"{new_key}: {new_value}")

    return "\n".join(formatted_predictions)

model = joblib.load('basic_mlb_player_stats_linear_model.joblib')
feature_columns_used_during_training = model.feature_names_in_

user_input = get_user_input(encoders)
if user_input:
    if user_input.get('Name Encoded') is not None:
        avg_columns = fetch_avg_columns(user_input['Name Encoded'], avg_columns)
    else:
        avg_columns = {}

    predictions = predict_player_stats(user_input, avg_columns, feature_columns_used_during_training, target_columns)
    formatted_predictions = format_predictions(predictions)
    print(formatted_predictions)


Home/Away (Home or Away):  Home
Year:  2090


Please input a valid year (2000-2050).


Year:  2024
Month (1-12):  7
Day (1-31):  29
Opposing Team:  Chicago Cubs
Opposing Pitcher:  Drew Smiley


No encoded correspondent found for 'Drew Smiley' in column 'Opposing Pitcher'.
Please refer to the list of valid Opposing Pitcher values:
             A.J. Alexy
              A.J. Cole
               A.J. Puk
       AJ Smith-Shawver
            Aaron Ashby
           Aaron Brooks
           Aaron Civale
             Aaron Loup
             Aaron Nola
          Aaron Sanchez
          Aaron Slegers
        Aaron Wilkerson
        Adalberto Mejía
             Adam Mazur
             Adam Oller
            Adam Plutko
        Adam Wainwright
         Adbert Alzolay
          Adonis Medina
          Adrian Houser
         Adrian Sampson
        Adrián Martinez
         Adrián Morejón
          Albert Suárez
         Alec Bettinger
             Alec Marsh
             Alec Mills
            Alek Manoah
           Alex Claudio
              Alex Cobb
             Alex Faedo
             Alex McRae
             Alex Reyes
             Alex Vesia
             Alex Wells
              Alex Woo

Opposing Pitcher:  Drew Smiley


No encoded correspondent found for 'Drew Smiley' in column 'Opposing Pitcher'.
Please refer to the list of valid Opposing Pitcher values:
             A.J. Alexy
              A.J. Cole
               A.J. Puk
       AJ Smith-Shawver
            Aaron Ashby
           Aaron Brooks
           Aaron Civale
             Aaron Loup
             Aaron Nola
          Aaron Sanchez
          Aaron Slegers
        Aaron Wilkerson
        Adalberto Mejía
             Adam Mazur
             Adam Oller
            Adam Plutko
        Adam Wainwright
         Adbert Alzolay
          Adonis Medina
          Adrian Houser
         Adrian Sampson
        Adrián Martinez
         Adrián Morejón
          Albert Suárez
         Alec Bettinger
             Alec Marsh
             Alec Mills
            Alek Manoah
           Alex Claudio
              Alex Cobb
             Alex Faedo
             Alex McRae
             Alex Reyes
             Alex Vesia
             Alex Wells
              Alex Woo

Opposing Pitcher:  Drew Smyly
Team:  Pittsburgh Pirates
Name:  Paul Skenes


Hits Against: 5.29
Walks Against: 1.63
Hits + Runs + RBIs: 0
RBIs: 0
Runs: 0
Batting Strikeouts: 0
Pitching Strikeouts: 6.53
Wins: 0.58
Home Runs: 0
