In [6]:
def GetPlayerLogs(playername, seasons_played: list):
    from nba_api.stats.endpoints import PlayerGameLog
    from nba_api.stats.static import players
    import pandas as pd

    # Step 1: Find Player ID
    def get_player_id(player_name):
        nba_players = players.get_players()  # Get all NBA players
        for player in nba_players:
            if player['full_name'].lower() == player_name.lower():
                return player['id']
        return None

    # Get Player's ID
    player_id = get_player_id(playername)
    if not player_id:
        print("Player not found.")
        return

    # Step 2: Retrieve Game Logs for the Seasons
    def get_player_game_logs(player_id, season):
        try:
            game_log = PlayerGameLog(player_id=player_id, season=season)
            game_log_data = game_log.get_data_frames()[0]  # Get the first DataFrame
            return game_log_data
        except Exception as e:
            print(f"Error fetching game logs for season {season}: {e}")
            return pd.DataFrame()  # Return an empty DataFrame if an error occurs

    # Fetch the Player's game logs for the specified seasons
    game_logs = [get_player_game_logs(player_id, season) for season in seasons_played]

    # Combine all DataFrames in the list into one DataFrame
    combined_logs = pd.concat(game_logs, ignore_index=True) if game_logs else pd.DataFrame()

    # Step 3: Save the Game Logs to a CSV File
    if not combined_logs.empty:
        min_season = min(seasons_played)
        max_season = max(seasons_played)
        if max_season == min_season:
            filename = f"{playername}_{max_season}_game_logs.csv"
        else:
            filename = f"{playername}_{min_season}_to_{max_season}_game_logs.csv"
        combined_logs.to_csv(filename, index=False)
        print(f"Game logs saved as {filename}")
    else:
        print("No game logs found for the specified seasons.")


In [7]:
GetPlayerLogs('Devin Booker',['2024-25','2023-24','2022-23','2021-22','2020-21','2020-21','2019-20',
                              '2018-19','2017-18','2016-17','2015-16','2014-15',
                              '2013-14','2012-13','2011-12','2010-11'])

Game logs saved as Devin Booker_2010-11_to_2024-25_game_logs.csv


  combined_logs = pd.concat(game_logs, ignore_index=True) if game_logs else pd.DataFrame()


In [43]:
GetPlayerLogs('Lebron James',['2024-25','2023-24','2022-23','2021-22','2020-21','2020-21','2019-20',
                              '2018-19','2017-18','2016-17','2015-16','2014-15',
                              '2013-14','2012-13','2011-12','2010-11'])

Game logs saved as Lebron James_2010-11_to_2024-25_game_logs.csv


## Hypothesis 1: Is there a strong correlation in matchups and win %?

In [53]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Replace 'file_path.csv' with the path to your CSV file
file_path = 'Devin Booker_2010-11_to_2024-25_game_logs.csv'

# Read the CSV file into a pandas DataFrame
df_devin_booker = pd.read_csv(file_path)

# Extract opponent information
df_devin_booker ['OPPONENT'] = df_devin_booker ['MATCHUP'].apply(lambda x: x.split()[2] if len(x.split()) > 2 else None)

# Convert 'WL' to binary win/loss indicator
df_devin_booker ['WL'] = df_devin_booker ['WL'].apply(lambda x: 1 if 'W' in x else 0)

# Use only the 'OPPONENT' column for features (make a copy)
X = df_devin_booker ['OPPONENT'].copy()

# Convert the categorical 'OPPONENT' column into numerical features using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Use 'WL' as the target variable
y = df_devin_booker ['WL']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

# Analyze coefficients
coefficients = pd.DataFrame({'Feature': X.columns, 'Coefficient': model.coef_})
print(coefficients.sort_values(by='Coefficient', ascending=False))


Mean Squared Error: 0.2530191625012806
R-squared Score: -0.02280976376647903
   Feature  Coefficient
18     NYK     0.416667
3      CHI     0.400000
2      CHA     0.333333
5      DAL     0.313725
16     MIN     0.285714
22     POR     0.257576
12     LAL     0.222222
20     ORL     0.212121
14     MIA     0.200000
21     PHI     0.166667
25     TOR     0.166667
7      DET     0.166667
4      CLE     0.166667
17     NOP     0.146667
13     MEM     0.142857
19     OKC     0.142857
8      GSW     0.140351
6      DEN     0.095238
23     SAC     0.089744
11     LAC     0.087719
10     IND     0.083333
27     WAS     0.083333
24     SAS     0.066667
9      HOU     0.066667
26     UTA     0.057971
15     MIL     0.051282
0      BKN     0.030303
1      BOS    -0.083333


In [62]:
wins = sum(df_devin_booker['WL'])
total = len(df_devin_booker['WL'])

print(wins/total)

0.5095168374816984


In [None]:
# check matchups!