In [1]:
import pandas as pd
import time
import requests
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo
import os

def safe_request(api_call, retries=5, wait=2.0):
    for attempt in range(retries):
        try:
            return api_call()
        except Exception as e:
            print(f"Retry {attempt + 1} failed: {e}")
            if attempt < retries - 1:
                time.sleep(wait * (2 ** attempt))
            else:
                return None

# Load existing progress if it exists
bio_file = "player_bios.csv"
if os.path.exists(bio_file):
    existing_bios = pd.read_csv(bio_file)
    processed_ids = set(existing_bios['id'].values)
    data = existing_bios.to_dict('records')
else:
    processed_ids = set()
    data = []

all_players = players.get_active_players()

for i, player in enumerate(all_players):
    player_id = player['id']
    full_name = player['full_name']

    if player_id in processed_ids:
        continue

    info = safe_request(lambda: commonplayerinfo.CommonPlayerInfo(player_id=player_id))
    if info is None:
        print(f"Skipping {full_name} due to repeated errors.")
        continue

    info_df = info.get_data_frames()[0]

    if info_df.empty:
        print(f"No bio data for {full_name}")
        continue

    player_data = {
        'id': player_id,
        'full_name': full_name,
        'team': info_df.at[0, 'TEAM_NAME'] if 'TEAM_NAME' in info_df.columns else None,
        'position': info_df.at[0, 'POSITION'] if 'POSITION' in info_df.columns else None,
        'height': info_df.at[0, 'HEIGHT'] if 'HEIGHT' in info_df.columns else None,
        'weight': info_df.at[0, 'WEIGHT'] if 'WEIGHT' in info_df.columns else None,
        'college': info_df.at[0, 'COLLEGE'] if 'COLLEGE' in info_df.columns else None,
        'birthdate': info_df.at[0, 'BIRTHDATE'] if 'BIRTHDATE' in info_df.columns else None
    }

    data.append(player_data)
    processed_ids.add(player_id)

    if len(data) % 10 == 0:
        print(f"{len(data)} players collected so far (not saved to disk)")

    print(f"{i+1}/{len(all_players)}: Saved bio for {full_name}")
    time.sleep(3)  # Slow and safe

# Final save
df = pd.DataFrame(data)
print("Collection complete.")


1/572: Saved bio for Precious Achiuwa
2/572: Saved bio for Steven Adams
3/572: Saved bio for Bam Adebayo
4/572: Saved bio for Ochai Agbaji
5/572: Saved bio for Santi Aldama
6/572: Saved bio for Trey Alexander
7/572: Saved bio for Nickeil Alexander-Walker
8/572: Saved bio for Grayson Allen
9/572: Saved bio for Jarrett Allen
10 players collected so far (not saved to disk)
10/572: Saved bio for Jose Alvarado
11/572: Saved bio for Kyle Anderson
12/572: Saved bio for Giannis Antetokounmpo
13/572: Saved bio for Cole Anthony
14/572: Saved bio for OG Anunoby
15/572: Saved bio for Taran Armstrong
16/572: Saved bio for Deni Avdija
17/572: Saved bio for Deandre Ayton
18/572: Saved bio for Marcus Bagley
19/572: Saved bio for Marvin Bagley III
20 players collected so far (not saved to disk)
20/572: Saved bio for Patrick Baldwin Jr.
21/572: Saved bio for LaMelo Ball
22/572: Saved bio for Lonzo Ball
23/572: Saved bio for Mo Bamba
24/572: Saved bio for Paolo Banchero
25/572: Saved bio for Desmond Bane

In [11]:
df

Unnamed: 0.1,Unnamed: 0,full_name,team,position,height,weight,age,region,average_points,average_assists,average_rebounds,average_steals,average_blocks,awards,conference,awards_count
0,0,Precious Achiuwa,Knicks,F,203.20,243.0,25,Nigeria,6.351852,0.925926,5.407407,0.759259,0.703704,,East,0
1,1,Steven Adams,Rockets,C,210.82,265.0,31,New Zealand,3.910714,1.142857,5.589286,0.392857,0.464286,All-Rookie Team,West,1
2,2,Bam Adebayo,Heat,F-C,205.74,255.0,27,USA,17.840000,4.333333,9.586667,1.240000,0.693333,All-Defensive Team; All-Defensive Team; All-De...,East,1
3,3,Ochai Agbaji,Raptors,G,195.58,215.0,24,USA,10.065574,1.557377,3.786885,0.803279,0.475410,,East,0
4,4,Santi Aldama,Grizzlies,F-C,213.36,215.0,24,Spain,12.754098,2.868852,6.573770,0.786885,0.475410,,West,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,567,Trae Young,Hawks,G,185.42,164.0,26,USA,24.027778,11.555556,3.138889,1.194444,0.152778,All-NBA; All-Rookie Team; NBA All-Star; NBA Al...,East,1
568,568,Ivica Zubac,Clippers,C,213.36,240.0,28,Croatia,16.426667,2.586667,12.546667,0.693333,1.173333,,West,0
569,569,Tristan da Silva,Magic,F,203.20,217.0,23,Germany,7.042254,1.464789,3.267606,0.422535,0.225352,,East,0
570,570,Vlatko Čančar,Nuggets,F,203.20,236.0,27,Slovenia,1.846154,0.692308,2.461538,0.230769,0.153846,NBA Champion,West,1


In [12]:
df

Unnamed: 0.1,Unnamed: 0,full_name,team,position,height,weight,age,region,average_points,average_assists,average_rebounds,average_steals,average_blocks,awards,conference,awards_count
0,0,Precious Achiuwa,Knicks,F,203.20,243.0,25,Nigeria,6.351852,0.925926,5.407407,0.759259,0.703704,,East,0
1,1,Steven Adams,Rockets,C,210.82,265.0,31,New Zealand,3.910714,1.142857,5.589286,0.392857,0.464286,All-Rookie Team,West,1
2,2,Bam Adebayo,Heat,F-C,205.74,255.0,27,USA,17.840000,4.333333,9.586667,1.240000,0.693333,All-Defensive Team; All-Defensive Team; All-De...,East,1
3,3,Ochai Agbaji,Raptors,G,195.58,215.0,24,USA,10.065574,1.557377,3.786885,0.803279,0.475410,,East,0
4,4,Santi Aldama,Grizzlies,F-C,213.36,215.0,24,Spain,12.754098,2.868852,6.573770,0.786885,0.475410,,West,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,567,Trae Young,Hawks,G,185.42,164.0,26,USA,24.027778,11.555556,3.138889,1.194444,0.152778,All-NBA; All-Rookie Team; NBA All-Star; NBA Al...,East,1
568,568,Ivica Zubac,Clippers,C,213.36,240.0,28,Croatia,16.426667,2.586667,12.546667,0.693333,1.173333,,West,0
569,569,Tristan da Silva,Magic,F,203.20,217.0,23,Germany,7.042254,1.464789,3.267606,0.422535,0.225352,,East,0
570,570,Vlatko Čančar,Nuggets,F,203.20,236.0,27,Slovenia,1.846154,0.692308,2.461538,0.230769,0.153846,NBA Champion,West,1


In [19]:
import pandas as pd
import time
import datetime
import os
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo, playercareerstats, playerawards

# Add custom headers to avoid being blocked
custom_headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'https://www.nba.com/',
    'Origin': 'https://www.nba.com'
}

def safe_request(api_call, retries=5, wait=2.0):
    for attempt in range(retries):
        try:
            return api_call()
        except Exception as e:
            print(f"Retry {attempt + 1} failed: {e}")
            if attempt < retries - 1:
                time.sleep(wait * (2 ** attempt))
            else:
                return None

def calculate_age(birthdate_str):
    if pd.isna(birthdate_str):
        return None
    try:
        # Handle format like '1999-09-19T00:00:00'
        birthdate = datetime.datetime.strptime(birthdate_str.split("T")[0], "%Y-%m-%d")
        today = datetime.datetime.today()
        return today.year - birthdate.year - ((today.month, today.day) < (birthdate.month, birthdate.day))
    except Exception as e:
        print(f"Failed to parse birthdate: {birthdate_str} — {e}")
        return None


# Load existing progress if it exists
bio_file = "player_bios.csv"
if os.path.exists(bio_file):
    existing_bios = pd.read_csv(bio_file)
    processed_ids = set(existing_bios['id'].values)
    data = existing_bios.to_dict('records')
else:
    processed_ids = set()
    data = []

all_players = players.get_active_players()

for i, player in enumerate(all_players):
    player_id = player['id']
    full_name = player['full_name']

    if player_id in processed_ids:
        continue

    print(f"Processing {full_name}...")

    # Get basic info
    info = safe_request(lambda: commonplayerinfo.CommonPlayerInfo(player_id=player_id, headers=custom_headers))
    if info is None:
        print(f"Skipping {full_name} due to repeated errors.")
        continue

    info_df = info.get_data_frames()[0]
    if info_df.empty:
        print(f"No bio data for {full_name}")
        continue

    birthdate = info_df.at[0, 'BIRTHDATE']
    age = calculate_age(birthdate)

    # Get latest season stats
        # Get latest season stats
    career = safe_request(lambda: playercareerstats.PlayerCareerStats(player_id=player_id, headers=custom_headers))
    pts = ast = reb = stl = blk = None
    if career:
        stats_df = career.get_data_frames()[0]
        nba_stats = stats_df[stats_df['LEAGUE_ID'] == '00'].sort_values(by='SEASON_ID', ascending=False)
        if not nba_stats.empty:
            latest = nba_stats.iloc[0]
            games_played = latest['GP']
            if games_played and games_played > 0:
                pts = latest['PTS'] / games_played
                ast = latest['AST'] / games_played
                reb = latest['REB'] / games_played
                stl = latest['STL'] / games_played
                blk = latest['BLK'] / games_played


    # Get awards
    awards = safe_request(lambda: playerawards.PlayerAwards(player_id=player_id, headers=custom_headers))
    if awards:
        awards_df = awards.get_data_frames()[0]
        award_list = "; ".join(awards_df['DESCRIPTION'].tolist()) if not awards_df.empty else None
    else:
        award_list = None

    player_data = {
        'id': player_id,
        'full_name': full_name,
        'team': info_df.at[0, 'TEAM_NAME'] if 'TEAM_NAME' in info_df.columns else None,
        'position': info_df.at[0, 'POSITION'] if 'POSITION' in info_df.columns else None,
        'height': info_df.at[0, 'HEIGHT'] if 'HEIGHT' in info_df.columns else None,
        'weight': info_df.at[0, 'WEIGHT'] if 'WEIGHT' in info_df.columns else None,
        'college': info_df.at[0, 'COLLEGE'] if 'COLLEGE' in info_df.columns else None,
        'birthdate': birthdate,
        'age': age,
        'region': info_df.at[0, 'COUNTRY'] if 'COUNTRY' in info_df.columns else None,
        'average_points': pts,
        'average_assists': ast,
        'average_rebounds': reb,
        'average_steals': stl,
        'average_blocks': blk,
        'awards': award_list
    }

    data.append(player_data)
    processed_ids.add(player_id)

    print(f"{i+1}/{len(all_players)}: Saved bio for {full_name}")

    if len(data) % 10 == 0:
        print(f"{len(data)} players collected so far...")

    time.sleep(3)  # Respectful request spacing

# Final save
df2 = pd.DataFrame(data)
print("Collection complete. Data saved to player_bios.csv.")


Processing Precious Achiuwa...
1/572: Saved bio for Precious Achiuwa
Processing Steven Adams...
2/572: Saved bio for Steven Adams
Processing Bam Adebayo...
3/572: Saved bio for Bam Adebayo
Processing Ochai Agbaji...
4/572: Saved bio for Ochai Agbaji
Processing Santi Aldama...
5/572: Saved bio for Santi Aldama
Processing Trey Alexander...
6/572: Saved bio for Trey Alexander
Processing Nickeil Alexander-Walker...
7/572: Saved bio for Nickeil Alexander-Walker
Processing Grayson Allen...
8/572: Saved bio for Grayson Allen
Processing Jarrett Allen...
9/572: Saved bio for Jarrett Allen
Processing Jose Alvarado...
10/572: Saved bio for Jose Alvarado
10 players collected so far...
Processing Kyle Anderson...
11/572: Saved bio for Kyle Anderson
Processing Giannis Antetokounmpo...
12/572: Saved bio for Giannis Antetokounmpo
Processing Cole Anthony...
13/572: Saved bio for Cole Anthony
Processing OG Anunoby...
14/572: Saved bio for OG Anunoby
Processing Taran Armstrong...
15/572: Saved bio for Ta

In [16]:

df2.to_csv("/Users/michaelluo/Desktop/DSC-Wizzrad/playerdata.csv")
#df2 = df2.drop(["id", "college", "birthdate"], axis = 1)
def height_to_cm(height):
    feet, inches = height.split('-')  # Split the height string into feet and inches
    feet = int(feet)  # Convert feet to integer
    inches = int(inches)  # Convert inches to integer
    cm = feet * 30.48 + inches * 2.54  # Convert to cm
    return cm

# Apply the function to the 'height' column and create a new 'height_cm' column
df2['height'] = df2['height'].apply(height_to_cm)
df2["height"]


NameError: name 'df2' is not defined

In [17]:
df2 =pd.read_csv("/Users/michaelluo/Desktop/DSC-Wizzrad/playerdata.csv")
df2.dtypes

Unnamed: 0            int64
full_name            object
team                 object
position             object
height              float64
weight              float64
age                   int64
region               object
average_points      float64
average_assists     float64
average_rebounds    float64
average_steals      float64
average_blocks      float64
awards               object
conference           object
dtype: object

In [41]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, _tree

# === Step 1: Load and Prepare Your Data ===

# Load your dataset
df = pd.read_csv("/Users/michaelluo/Desktop/DSC-Wizzrad/playerdata.csv")

# Fix the 'awards' column: turn it into a numeric count
df['awards'] = df['awards'].fillna('')
df['awards_count'] = df['awards'].apply(lambda x: len(x.split(',')) if x else 0)

# Select numeric features
numeric_features = [
    'age', 'height', 'weight',
    'average_points', 'average_assists',
    'average_rebounds', 'average_steals', 'average_blocks',
    'awards_count'
]

# One-hot encode categorical features
categorical_features = pd.get_dummies(df[['position', 'team', 'region']])

# Combine all features
X = pd.concat([df[numeric_features], categorical_features], axis=1)

# Target: player name
y = df['full_name']

# === Step 2: Train Decision Tree Classifier ===

clf = DecisionTreeClassifier(max_depth=10)
clf.fit(X, y)

# === Step 3: Interactive Game ===

def ask_question(question: str):
    while True:
        answer = input(f"{question} (y/n): ").strip().lower()
        if answer in ['y', 'yes']:
            return True
        elif answer in ['n', 'no']:
            return False
        else:
            print("Please answer with 'y' or 'n'.")

def traverse_tree(clf, feature_names):
    tree = clf.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree.feature
    ]

    def clean_question(name, threshold):
        # Handle binary one-hot columns
        if '_Cavaliers' in name or '_Lakers' in name or '_Warriors' in name or '_team_' in name:
            team = name.split('_')[-1]
            return f"Is the player on the {team}?"
        if name.startswith("position_"):
            pos = name.split('_')[1]
            return f"Is the player's position {pos}?"
        if name.startswith("region_"):
            region = name.split('_')[1]
            return f"Is the player from the {region} region?"
        if threshold == 0.5:
            return f"Does the player have '{name.replace('_', ' ')}'?"
        return f"Is the player's '{name.replace('_', ' ')}' > {round(threshold, 2)}?"

    def recurse(node):
        if tree.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree.threshold[node]
            question = clean_question(name, threshold)
            answer = ask_question(question)

            if answer:
                return recurse(tree.children_right[node])
            else:
                return recurse(tree.children_left[node])
        else:
            values = tree.value[node][0]
            best_guess_index = values.argmax()
            guess = clf.classes_[best_guess_index]
            return guess

    return recurse(0)


# === Step 4: Play the Game ===

print("🔮 Think of an active NBA player in your mind...")
input("Press Enter when you're ready!\n")

guess = traverse_tree(clf, X.columns)
print(f"\n🏀 My guess is... **{guess}**!")



🔮 Think of an active NBA player in your mind...

🏀 My guess is... **A.J. Lawson**!


In [33]:
# Define which teams are in which conference
east_teams = [
    'Celtics', 'Nets', 'Knicks', '76ers', 'Raptors',
    'Bulls', 'Cavaliers', 'Pistons', 'Pacers', 'Bucks',
    'Heat', 'Hawks', 'Hornets', 'Magic', 'Wizards'
]

# Create new column 'conference' based on the team
df2['conference'] = df2['team'].apply(lambda team: 'East' if team in east_teams else 'West')
df2.to_csv("/Users/michaelluo/Desktop/DSC-Wizzrad/playerdata.csv")
df2["position"].unique()

array(['Forward', 'Center', 'Center-Forward', 'Guard', 'Forward-Center',
       'Forward-Guard', 'Guard-Forward'], dtype=object)

In [57]:
import pandas as pd
import numpy as np

# === Helper Functions ===
def ask_question(question: str):
    while True:
        ans = input(f"{question} (y/n): ").strip().lower()
        if ans in ['y', 'yes']:
            return True
        elif ans in ['n', 'no']:
            return False
        else:
            print("Please answer with 'y' or 'n'.")

def cm_to_inches(cm):
    inches = cm / 2.54
    feet = int(inches // 12)
    inches_remainder = round(inches % 12)
    return f"{feet}'{inches_remainder}\""

def clean_position(pos):
    pos = str(pos).lower()
    if 'guard' in pos and 'forward' in pos:
        return 'G-F'
    if ('forward' in pos and 'center' in pos) or ('center' in pos and 'forward' in pos):
        return 'F-C'
    if 'forward' in pos:
        return 'F'
    if 'center' in pos:
        return 'C'
    if 'guard' in pos:
        return 'G'
    return 'Unknown'

# === Load and Clean the Dataset ===
print("🔮 Think of an active NBA player from the dataset...")
print("Let's narrow down who your player is!")

df = pd.read_csv("/Users/michaelluo/Desktop/DSC-Wizzrad/playerdata.csv")

# Fix awards
df['awards'] = df['awards'].fillna('')
df['awards_count'] = df['awards'].apply(lambda x: len(x.split(',')) if x else 0)

# Standardize positions
df['position'] = df['position'].apply(clean_position)

# === Ask if player is in the East Conference first ===
is_east_conference = ask_question("Is your player in the Eastern Conference?")
chosen_conference = "East" if is_east_conference else "West"

# Filter the data by selected conference
players = df[df['conference'] == chosen_conference].copy()

# Binary search algorithm
def find_player(players_df):
    current_players = players_df.copy()
    
    # Track the number of questions asked
    questions_asked = 0
    
    # Continue asking questions until we narrow down to one player
    while len(current_players) > 1:
        questions_asked += 1
        
        # Display how many players left (for debugging)
        print(f"[Debug] {len(current_players)} players remaining.")
        
        # If we're down to a few players, directly confirm with names
        if len(current_players) <= 5:
            # Ask directly about specific players
            for _, player_row in current_players.iterrows():
                player_name = player_row['full_name']
                is_player = ask_question(f"Is your player {player_name}?")
                if is_player:
                    return player_name
            
            # If we get here, something went wrong
            return "Player not found. There might be an error in the dataset."
        
        # Find the column that most evenly divides the remaining players
        best_column = None
        best_threshold = None
        best_balance = float('inf')  # Lower is better (we want 50/50 split)
        
        # First try team-based questions (very effective)
        teams = current_players['team'].unique()
        if len(teams) > 1:
            for team in teams:
                team_count = (current_players['team'] == team).sum()
                non_team_count = len(current_players) - team_count
                balance = abs(team_count - non_team_count)
                
                if balance < best_balance:
                    best_balance = balance
                    best_column = 'team'
                    best_threshold = team
        
        # Handle position questions differently to be more specific
        position_groups = {
            'pure_guard': current_players['position'] == 'G',
            'pure_forward': current_players['position'] == 'F',
            'pure_center': current_players['position'] == 'C',
            'guard_forward': current_players['position'] == 'G-F',
            'forward_center': current_players['position'] == 'F-C'
        }
        
        for pos_type, pos_filter in position_groups.items():
            pos_count = pos_filter.sum()
            non_pos_count = len(current_players) - pos_count
            balance = abs(pos_count - non_pos_count)
            
            if balance < best_balance and pos_count > 0:
                best_balance = balance
                best_column = 'position_specific'
                best_threshold = pos_type
        
        # Try awards
        has_awards = current_players['awards_count'] > 0
        award_count = has_awards.sum()
        no_award_count = len(current_players) - award_count
        award_balance = abs(award_count - no_award_count)
        
        if award_balance < best_balance:
            best_balance = award_balance
            best_column = 'awards'
            best_threshold = None
        
        # Try numeric columns
        numeric_cols = ['age', 'height', 'weight', 'average_points', 'average_assists', 
                       'average_rebounds', 'average_steals', 'average_blocks']
        
        for col in numeric_cols:
            values = sorted(current_players[col].unique())
            if len(values) > 1:
                # Try different threshold values for better split
                for i in range(len(values) - 1):
                    threshold = (values[i] + values[i + 1]) / 2
                    
                    left_count = (current_players[col] <= threshold).sum()
                    right_count = len(current_players) - left_count
                    balance = abs(left_count - right_count)
                    
                    if balance < best_balance:
                        best_balance = balance
                        best_column = col
                        best_threshold = threshold
        
        # Ask the question based on the best dividing attribute
        if best_column == 'team':
            answer = ask_question(f"Is your player on the {best_threshold}?")
            if answer:
                current_players = current_players[current_players['team'] == best_threshold]
            else:
                current_players = current_players[current_players['team'] != best_threshold]
        
        elif best_column == 'position_specific':
            if best_threshold == 'pure_guard':
                answer = ask_question(f"Is your player strictly a Guard (G), not a Guard-Forward hybrid?")
                if answer:
                    current_players = current_players[current_players['position'] == 'G']
                else:
                    current_players = current_players[current_players['position'] != 'G']
            elif best_threshold == 'pure_forward':
                answer = ask_question(f"Is your player strictly a Forward (F), not a hybrid position?")
                if answer:
                    current_players = current_players[current_players['position'] == 'F']
                else:
                    current_players = current_players[current_players['position'] != 'F']
            elif best_threshold == 'pure_center':
                answer = ask_question(f"Is your player strictly a Center (C), not a Forward-Center hybrid?")
                if answer:
                    current_players = current_players[current_players['position'] == 'C']
                else:
                    current_players = current_players[current_players['position'] != 'C']
            elif best_threshold == 'guard_forward':
                answer = ask_question(f"Is your player a Guard-Forward (G-F) hybrid?")
                if answer:
                    current_players = current_players[current_players['position'] == 'G-F']
                else:
                    current_players = current_players[current_players['position'] != 'G-F']
            elif best_threshold == 'forward_center':
                answer = ask_question(f"Is your player a Forward-Center (F-C) hybrid?")
                if answer:
                    current_players = current_players[current_players['position'] == 'F-C']
                else:
                    current_players = current_players[current_players['position'] != 'F-C']
        
        elif best_column == 'awards':
            answer = ask_question(f"Has your player received any awards?")
            if answer:
                current_players = current_players[current_players['awards_count'] > 0]
            else:
                current_players = current_players[current_players['awards_count'] == 0]
        
        elif best_column == 'age':
            answer = ask_question(f"Is your player older than {int(best_threshold)} years?")
            if answer:
                current_players = current_players[current_players[best_column] > best_threshold]
            else:
                current_players = current_players[current_players[best_column] <= best_threshold]
        
        elif best_column == 'height':
            answer = ask_question(f"Is your player taller than {cm_to_inches(best_threshold)}?")
            if answer:
                current_players = current_players[current_players[best_column] > best_threshold]
            else:
                current_players = current_players[current_players[best_column] <= best_threshold]
        
        elif best_column == 'weight':
            answer = ask_question(f"Is your player heavier than {int(best_threshold)} lbs?")
            if answer:
                current_players = current_players[current_players[best_column] > best_threshold]
            else:
                current_players = current_players[current_players[best_column] <= best_threshold]
        
        elif best_column in ['average_points', 'average_assists', 'average_rebounds', 'average_steals', 'average_blocks']:
            stat_name = best_column.replace('average_', '')
            answer = ask_question(f"Does your player average more than {best_threshold:.1f} {stat_name}?")
            if answer:
                current_players = current_players[current_players[best_column] > best_threshold]
            else:
                current_players = current_players[current_players[best_column] <= best_threshold]
        
        # Fallback if no good question found
        else:
            # Just ask directly about subset of players
            half_index = len(current_players) // 2
            first_half = current_players.iloc[:half_index]
            player_names = ', '.join(first_half['full_name'].values)
            
            answer = ask_question(f"Is your player one of these: {player_names}?")
            if answer:
                current_players = first_half
            else:
                current_players = current_players.iloc[half_index:]
    
    # Return the only player left
    if len(current_players) == 1:
        player_name = current_players['full_name'].iloc[0]
        # Final confirmation
        confirmation = ask_question(f"Just to confirm: Is your player {player_name}?")
        if confirmation:
            return player_name
        else:
            print("Hmm, something seems off. Let me try a different approach.")
            # If user says no, then something went wrong - let's restart with original dataset
            # But only include players from the chosen conference
            return find_player_with_direct_questions(players_df)
    else:
        # This should never happen, but just in case
        return "Could not determine the player. Something went wrong."

def find_player_with_direct_questions(players_df):
    """Fallback function that just asks directly about each player."""
    print("\n⚠️ Using backup approach to find your player...")
    
    for team in sorted(players_df['team'].unique()):
        team_players = players_df[players_df['team'] == team]
        if ask_question(f"Is your player on the {team}?"):
            print(f"Great! Your player is on the {team}.")
            for _, player in team_players.iterrows():
                player_name = player['full_name']
                if ask_question(f"Is your player {player_name}?"):
                    return player_name
            
            # If we get here, the player wasn't found
            return "Player not found in the dataset for the selected team."
    
    return "Player not found in the dataset."

# Run the game
guess = find_player(players)
print(f"\n🏀 My guess is... **{guess}**!")
print("Thanks for playing!")

🔮 Think of an active NBA player from the dataset...
Let's narrow down who your player is!
[Debug] 268 players remaining.
[Debug] 132 players remaining.
[Debug] 66 players remaining.
[Debug] 33 players remaining.
[Debug] 17 players remaining.
[Debug] 9 players remaining.
[Debug] 5 players remaining.

🏀 My guess is... **Saddiq Bey**!
Thanks for playing!


In [10]:
df2.dtypes

NameError: name 'df2' is not defined