## Import Data

In [24]:
import pandas as pd

# Replace 'path_to_your_csv_file' with the actual path where your CSV file is located
file_path = '/Users/Jayden.Chen/vscode/csp/modeldata/lebron_career.csv'

# Load the CSV file into a pandas DataFrame
lebron_career_df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to verify the data was loaded correctly
print(lebron_career_df.head())

   game        date     age team  opp   result     mp  fg  fga    fgp  ...  \
0     1  2003-10-29  18-303  CLE  SAC  L (-14)  42:00  12   20  0.600  ...   
1     2  2003-10-30  18-304  CLE  PHO   L (-9)  41:00   8   17  0.471  ...   
2     3  2003-11-01  18-306  CLE  POR  L (-19)  39:00   3   12  0.250  ...   
3     4  2003-11-05  18-310  CLE  DEN   L (-4)  41:00   3   11  0.273  ...   
4     5  2003-11-07  18-312  CLE  IND   L (-1)  44:00   8   18  0.444  ...   

   orb  drb  trb  ast  stl  blk  tov  pts  game_score  minus_plus  
0    2    4    6    9    4    0    2   25        24.7          -9  
1    2   10   12    8    1    0    7   21        14.7          -3  
2    0    4    4    6    2    0    2    8         5.0         -21  
3    2    9   11    7    2    3    2    7        11.2          -3  
4    0    5    5    3    0    0    7   23         9.0          -7  

[5 rows x 26 columns]


## Clean Data

In [27]:
# Selecting only required columns
selected_columns = ['opp', 'mp', 'pts', 'ast', 'drb', 'stl', 'blk', 'tov']
lebron_career_cleaned = lebron_career_df[selected_columns].copy()

# You may need to handle missing values if any
lebron_career_cleaned.dropna(inplace=True)

# You might need to convert minutes_played to a numeric format if it's not already
# lebron_career_cleaned['minutes_played'] = pd.to_numeric(lebron_career_cleaned['minutes_played'], errors='coerce')

# Optionally, you can perform additional preprocessing if required
# For example, encoding categorical variables

print(lebron_career_cleaned.columns)
display(lebron_career_cleaned)


Index(['opp', 'mp', 'pts', 'ast', 'drb', 'stl', 'blk', 'tov'], dtype='object')


Unnamed: 0,opp,mp,pts,ast,drb,stl,blk,tov
0,SAC,42:00,25,9,4,4,0,2
1,PHO,41:00,21,8,10,1,0,7
2,POR,39:00,8,6,4,2,0,2
3,DEN,41:00,7,7,9,2,3,2
4,IND,44:00,23,3,5,0,0,7
...,...,...,...,...,...,...,...,...
1127,PHO,33:13,28,11,12,3,2,5
1128,POR,41:05,35,6,14,2,3,6
1129,CHI,39:53,33,12,12,1,2,6
1130,MIL,40:01,40,10,7,2,1,6


## Machine Learning

In [3]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# Load LeBron career data
lebron_career_df = pd.read_csv('/Users/Jayden.Chen/vscode/csp/modeldata/lebron_career.csv')

# Select relevant columns
selected_columns = ['opp', 'mp', 'pts', 'ast', 'drb', 'stl', 'blk', 'tov']

# Filter data for a specific opponent
def filter_opponent(data, opponent):
    return data[data['opp'] == opponent]

# Get LeBron's last 10 matchups against a specific opponent
def last_10_matchups(data):
    return data.tail(10)

# Ask for the opponent team
opponent = input("Enter the opponent team: ")

# Filter data for the specified opponent
opponent_data = filter_opponent(lebron_career_df, opponent)

# Get LeBron's last 10 matchups against the opponent
last_10_games = last_10_matchups(opponent_data)

# Convert 'mp' to minutes
last_10_games['mp'] = last_10_games['mp'].apply(lambda x: int(x.split(':')[0])*60 + int(x.split(':')[1]))

# Prepare features and target variables
X = last_10_games[['mp', 'pts', 'ast', 'drb', 'stl', 'blk', 'tov']]
y = last_10_games['pts']  # Predicting points scored

# Train a linear regression model
model = LinearRegression()
model.fit(X, y)

# Predict LeBron's stats for the upcoming game
upcoming_game_stats = model.predict(X)

# Round predicted stats to the nearest tenth value
upcoming_game_stats_rounded = [round(stat, 1) for stat in upcoming_game_stats]

# Print predicted stats
print("Predicted stats for the upcoming game against", opponent, ":")
print("Minutes played:", upcoming_game_stats_rounded[0])
print("Points:", upcoming_game_stats_rounded[1])
print("Assists:", upcoming_game_stats_rounded[2])
print("Defensive rebounds:", upcoming_game_stats_rounded[3])
print("Steals:", upcoming_game_stats_rounded[4])
print("Blocks:", upcoming_game_stats_rounded[5])
print("Turnovers:", upcoming_game_stats_rounded[6])


Predicted stats for the upcoming game against LAC :
Minutes played: 30.0
Points: 18.0
Assists: 31.0
Defensive rebounds: 32.0
Steals: 23.0
Blocks: 22.0
Turnovers: 27.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_10_games['mp'] = last_10_games['mp'].apply(lambda x: int(x.split(':')[0])*60 + int(x.split(':')[1]))
