In [None]:
# NOTE!!!!!!!!!!!! THIS SHOULD ONLY BE USED ONCE TO CREATE A NEW CSV FOR A PLAYER. ONCE A CSV HAS BEEN CREATED, USE THE updateStats METHOD TO UPDATE THEIR MOST CURRENT GAME.

def importantStats(player_name):
    player_gameLog = player_gamelog(player_name)
    player_gameLog = player_gameLog.iloc[::-1].reset_index(drop=True)
    window_size = 5
    stats_to_average = ['PTS', 'FGM', 'FGA', 'STL', 'BLK', 'TOV', 'FG_PCT', 'MIN', 'PLUS_MINUS']
    for stat in stats_to_average:
        player_gameLog[f'MA_{stat}'] = player_gameLog[stat].rolling(window=window_size, min_periods=1).mean().shift(0)
    player_gameLog['AVG_PTS'] = player_gameLog['PTS'].expanding().mean().shift(1)

    # Extract opponent team abbreviation
    player_gameLog['TEAM_ABBREVIATION'] = player_gameLog['MATCHUP'].apply(extract_team)
    player_gameLog['TEAM_ID'] = player_gameLog['TEAM_ABBREVIATION'].apply(team_id)
    player_gameLog['OPP_TEAM'] = player_gameLog['MATCHUP'].apply(extract_opponent)
    player_gameLog['OPP_TEAMID'] = player_gameLog['OPP_TEAM'].apply(team_id)
    # Fetch the PPG allowed by the opponent team
    player_gameLog['OPP_PPG_ALLOWED'] = player_gameLog['OPP_TEAMID'].apply(getPPG_allowed)
    player_gameLog['OPP_DEF_EFFICIENCY'] = player_gameLog['OPP_TEAMID'].apply(getDEFefficiency)
    player_gameLog['PPG'] = player_gameLog['TEAM_ID'].apply(getPPG)
    player_gameLog['OFF_EFFICIENCY'] = player_gameLog['TEAM_ID'].apply(getOFFefficiency)
    player_gameLog['POSITION'] = player_gameLog['Player_ID'].apply(position)
    encoded_positions = player_gameLog['POSITION'].apply(encode_positions)
    player_gameLog = pd.concat([player_gameLog, encoded_positions], axis=1)
    player_gameLog['AGE'] = player_gameLog['Player_ID'].apply(age)
    player_gameLog['HEIGHT'] = player_gameLog['Player_ID'].apply(height)
    player_gameLog['WEIGHT'] = player_gameLog['Player_ID'].apply(weight)

    homeOrAway(player_gameLog)
    return player_gameLog
# Example Usage, runtime ~1m 18s
# Missing Jaren Jackson Jr. and Michael Porter Jr. <- the "Jr." is the issue
players_list = [
    "Alperen Sengun", "Anfernee Simons", "Anthony Davis", "Anthony Edwards", "Austin Reaves", "Brandon Miller", "Cade Cunningham", "Cameron Johnson", "Coby White", "Damian Lillard", "Darius Garland", "De'Aaron Fox", "DeMar DeRozan", "Devin Booker", "Domantas Sabonis", "Donovan Mitchell", "Evan Mobley", "Giannis Antetokounmpo", "Jalen Brunson", "Jalen Green", "Jalen Johnson", "Jalen Williams", "Jamal Murray", "James Harden", "Jaylen Brown", "Jayson Tatum", "John Collins", "Jordan Poole", "Julius Randle", "Karl-Anthony Towns", "Kevin Durant", "Kyrie Irving", "LaMelo Ball", "Lauri Markkanen", "LeBron James", "Mikal Bridges", "Nikola Jokić", "Nikola Vučević", "Norman Powell", "Pascal Siakam", "RJ Barrett", "Shai Gilgeous-Alexander", "Stephen Curry", "Trae Young", "Tyler Herro", "Tyrese Haliburton", "Tyrese Maxey", "Victor Wembanyama", "Zach LaVine"
]
# importantStats("LaMelo Ball")

In [None]:
# USE THIS METHOD ONCE A CSV HAS BEEN CREATED FOR A PLAYER
def updateStats(player_csv, player_name):
    df = pd.read_csv(player_csv)
    df_new = importantStats(player_name)
    last_row = df_new.tail(1)
    df = pd.concat([df, last_row])
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    if df.iloc[-1]['GAME_DATE'] == df.iloc[-2]['GAME_DATE']:
        df = df.iloc[:-1]
        print(f"This {player_name} CSV is up to date")
    else:
        print(f"Updated {player_name} CSV")
        file_path = os.path.join('players csvs', player_name + '.csv')
        df.to_csv(file_path, index=False)
    return df
# updateStats('players csvs/Anfernee Simons.csv', 'Anfernee Simons') 
players_list = [
    "Alperen Sengun", "Anfernee Simons", "Anthony Davis", "Anthony Edwards", "Austin Reaves", "Brandon Miller", "Cade Cunningham", "Cameron Johnson", "Coby White", "Damian Lillard", "Darius Garland", "De'Aaron Fox", "DeMar DeRozan", "Devin Booker", "Domantas Sabonis", "Donovan Mitchell", "Evan Mobley", "Giannis Antetokounmpo", "Jalen Brunson", "Jalen Green", "Jalen Johnson", "Jalen Williams", "Jamal Murray", "James Harden", "Jaylen Brown", "Jayson Tatum", "John Collins", "Jordan Poole", "Julius Randle", "Karl-Anthony Towns", "Kevin Durant", "Kyrie Irving", "LaMelo Ball", "Lauri Markkanen", "LeBron James", "Mikal Bridges", "Nikola Jokić", "Nikola Vučević", "Norman Powell", "Pascal Siakam", "RJ Barrett", "Shai Gilgeous-Alexander", "Stephen Curry", "Trae Young", "Tyler Herro", "Tyrese Haliburton", "Tyrese Maxey", "Victor Wembanyama", "Zach LaVine"
]

def updateAll(player_list):
    for player in player_list:
        file_path = os.path.join('players csvs', player + '.csv')
        df_updated = updateStats(file_path, player)
        df_updated.to_csv(file_path)
# updateAll(players_list)


In [None]:
# This assumes the player_name you are passing in, you have already created a CSV
def createModel(player_name):
    file_path = os.path.join('players csvs', player_name + '.csv')
    df = pd.read_csv(file_path)
    df = df.iloc[7:].reset_index(drop=True)
    df.dropna()
    # Assuming you need to calculate average points again
    average_points = df['PTS'].mean()
    df['target'] = (df['PTS'] > average_points).astype(int)
    categorical_features = ['MATCHUP', 'Home_Away']
    one_hot = OneHotEncoder()

    # Define the pipeline
    classifier = Pipeline([
        ('preprocessor', ColumnTransformer(
            transformers=[
                ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)],
            remainder='passthrough')),
        ('classifier', LogisticRegression(max_iter=1000))
    ])

    # Assuming the features remain the same or adjust as necessary
    X = df[['MATCHUP', 'OPP_DEF_EFFICIENCY', 'OPP_PPG_ALLOWED', 'PPG', 'OFF_EFFICIENCY', 'MA_PTS', 'MA_FGM', 'MA_FGA', 'MA_STL', 'MA_BLK', 'MA_TOV', 'MA_FG_PCT', 'MA_MIN', 'MA_PLUS_MINUS', 'Home_Away']]  # Features
    y = df['target']                # Target variable

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    # Print results
    print(f"{player_name} - Accuracy: {accuracy_score(y_test, y_pred)}")
    print(classification_report(y_test, y_pred))
    
    # Ensure the output directory exists
    output_folder = 'players models'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Save the new model
    joblib.dump(classifier, f'{output_folder}/{player_name}.pkl')

players_list = [
    "Alperen Sengun", "Anfernee Simons", "Anthony Davis", "Anthony Edwards", "Austin Reaves", "Brandon Miller", "Cade Cunningham", "Cameron Johnson", "Coby White", "Damian Lillard", "Darius Garland", "De'Aaron Fox", "DeMar DeRozan", "Devin Booker", "Domantas Sabonis", "Donovan Mitchell", "Evan Mobley", "Giannis Antetokounmpo", "Jalen Brunson", "Jalen Green", "Jalen Johnson", "Jalen Williams", "Jamal Murray", "James Harden", "Jaylen Brown", "Jayson Tatum", "John Collins", "Jordan Poole", "Julius Randle", "Karl-Anthony Towns", "Kevin Durant", "Kyrie Irving", "LaMelo Ball", "Lauri Markkanen", "LeBron James", "Mikal Bridges", "Nikola Jokić", "Nikola Vučević", "Norman Powell", "Pascal Siakam", "RJ Barrett", "Shai Gilgeous-Alexander", "Stephen Curry", "Trae Young", "Tyler Herro", "Tyrese Haliburton", "Tyrese Maxey", "Victor Wembanyama", "Zach LaVine"
]
# Every time you create a fresh new model, delete the name from the list
# createModel("Damian Lillard")

In [None]:
def getMA(player_name):
    file_path = os.path.join('players csvs', player_name + '.csv')
    sabonis_stats = pd.read_csv(file_path)
    return sabonis_stats.iloc[-1][['MA_PTS', 'MA_FGM', 'MA_FGA', 'MA_STL', 'MA_BLK', 'MA_TOV', 'MA_FG_PCT', 'MA_MIN', 'MA_PLUS_MINUS']]
# getMA("Damian Lillard")

In [None]:
def predict(player_name, matchup):
    # Sample data for a new game
    file_path_csv = os.path.join('players csvs', player_name + '.csv')
    file_path_pkl = os.path.join('players models', player_name + '.pkl')
    if ' @ ' in matchup:
        teams = matchup.split(' @ ')
        home_away = 0  
    elif ' vs. ' in matchup:
        teams = matchup.split(' vs. ')
        home_away = 1 
    df = pd.read_csv('players csvs/Damian Lillard.csv')
    last_row = df.iloc[-1:]
    new_game = {
        'MATCHUP': [matchup],
        'OPP_DEF_EFFICIENCY': [getDEFefficiency(team_id(teams[1]))],
        'OPP_PPG_ALLOWED': [getPPG_allowed(team_id(teams[1]))],
        'PPG': [getPPG(team_id(teams[0]))],
        'OFF_EFFICIENCY': [getOFFefficiency(team_id(teams[0]))],
        'MA_PTS': [last_row['MA_PTS'].values[0]],
        'MA_FGM': [last_row['MA_FGM'].values[0]],
        'MA_FGA': [last_row['MA_FGA'].values[0]],
        'MA_STL': [last_row['MA_STL'].values[0]],
        'MA_BLK': [last_row['MA_BLK'].values[0]],
        'MA_TOV': [last_row['MA_TOV'].values[0]],
        'MA_FG_PCT': [last_row['MA_FG_PCT'].values[0]],
        'MA_MIN': [last_row['MA_MIN'].values[0]],
        'MA_PLUS_MINUS': [last_row['MA_PLUS_MINUS'].values[0]],
        'Home_Away': [home_away]
    }
    # Create a DataFrame
    df_new_game = pd.DataFrame(new_game)

    classifier = joblib.load(file_path_pkl)

    predicted_target = classifier.predict(df_new_game)

    # Output the prediction
    print("Predicted Target:", predicted_target)

    probabilities = classifier.predict_proba(df_new_game)

    # Since it's likely a binary classification, probabilities[:, 1] will give you the probability of class 1
    predicted_probabilities = probabilities[:, 1]

    # Output the probabilities
    print("Average Points(line):", last_row['AVG_PTS'].iloc[0])
    print("Probability of Class 1(over) (Predicted Target):",predicted_probabilities)
# predict('Damian Lillard', 'MIL vs. SAS')