# Predicting the Shooting Percentage of Basketball Stars

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import zipfile
import os
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

seed = 42
os.environ["PYTHONHASHSEED"] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

## Load data

In [2]:
df = pd.read_csv("data_train.csv")
df

Unnamed: 0,loc_x,loc_y,minutes_remaining,shot_distance,shot_made_flag,shot_id
0,0,0,7,0,1,1
1,-3,130,9,13,0,2
2,82,8,7,8,0,3
3,100,101,10,14,1,4
4,0,0,9,0,1,5
...,...,...,...,...,...,...
24995,97,43,1,10,0,24996
24996,18,77,2,7,0,24997
24997,234,4,0,23,0,24998
24998,75,20,2,7,0,24999


In [3]:
df.isna().any()

loc_x                False
loc_y                False
minutes_remaining    False
shot_distance        False
shot_made_flag       False
shot_id              False
dtype: bool

In [4]:
df.describe()

Unnamed: 0,loc_x,loc_y,minutes_remaining,shot_distance,shot_made_flag,shot_id
count,25000.0,25000.0,25000.0,25000.0,25000.0,25000.0
mean,7.1254,91.28788,4.88512,13.45852,0.44744,12500.5
std,110.029921,88.289321,3.452593,9.397722,0.49724,7217.022701
min,-250.0,-44.0,0.0,0.0,0.0,1.0
25%,-67.25,4.0,2.0,5.0,0.0,6250.75
50%,0.0,72.0,5.0,15.0,0.0,12500.5
75%,94.0,160.0,8.0,21.0,1.0,18750.25
max,248.0,791.0,11.0,79.0,1.0,25000.0


In [5]:
input_cols = ["loc_x", "loc_y"]  # Specified in the task statement
output_col = "shot_made_flag"

In [6]:
X = df[input_cols]
y = df[output_col]
X.shape, y.shape

((25000, 2), (25000,))

In [7]:
X_tensor = torch.tensor(X.to_numpy(), dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, random_state=42)
X_train.shape, y_train.shape

(torch.Size([18750, 2]), torch.Size([18750]))

## Define model and train

In [9]:
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.act = nn.Tanh()
        self.fc1 = nn.Linear(2, 8)
        self.fc2 = nn.Linear(8, 8)
        self.fc3 = nn.Linear(8, 1)
        
    def forward(self, x):
        x = self.act(self.fc1(x))
        x = self.act(self.fc2(x))
        x = self.fc3(x)
        return x

In [10]:
def train(model, optimizer, criterion, X, y, num_epochs):
    train_losses = []
    for epoch in range(num_epochs):
        model.train()

        outputs = model(X).squeeze()
        loss = criterion(outputs, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_losses.append(loss.item())
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}")
    
    return train_losses

In [11]:
@torch.no_grad()
def test(model, X, y):
    model.eval()

    probs = torch.sigmoid(model(X).squeeze())
    preds = (probs >= 0.5).to(torch.uint8)

    return accuracy_score(preds, y)

In [12]:
model = MyModel()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [13]:
train(model, optimizer, criterion, X_train, y_train, 15);

Epoch [1/15], Loss: 0.699416995048523
Epoch [2/15], Loss: 0.698686420917511
Epoch [3/15], Loss: 0.6979755163192749
Epoch [4/15], Loss: 0.6972848773002625
Epoch [5/15], Loss: 0.6966170072555542
Epoch [6/15], Loss: 0.6959756016731262
Epoch [7/15], Loss: 0.6953611373901367
Epoch [8/15], Loss: 0.6947712302207947
Epoch [9/15], Loss: 0.6942040920257568
Epoch [10/15], Loss: 0.6936602592468262
Epoch [11/15], Loss: 0.6931406855583191
Epoch [12/15], Loss: 0.6926440000534058
Epoch [13/15], Loss: 0.692168653011322
Epoch [14/15], Loss: 0.6917139291763306
Epoch [15/15], Loss: 0.691280722618103


In [14]:
test(model, X_test, y_test)

0.52288

## Save for submission

In [15]:
py_filename = "submission_model.py"
pth_filename = "submission_dic.pth"
zip_filename = "submission.zip"  # Will submit this zip to the grader

In [16]:
torch.save(model.state_dict(), pth_filename)

In [17]:
model_code = """  
import torch
import torch.nn as nn


class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.act = nn.Tanh()
        self.fc1 = nn.Linear(2, 8)
        self.fc2 = nn.Linear(8, 8)
        self.fc3 = nn.Linear(8, 1)
        
    def forward(self, x):
        x = self.act(self.fc1(x))
        x = self.act(self.fc2(x))
        x = self.fc3(x)
        return x
""".lstrip()

with open(py_filename, "w") as f:
    f.write(model_code)

In [18]:
with zipfile.ZipFile(zip_filename, "w") as zipf:
    for file in [py_filename, pth_filename]:
        zipf.write(file, os.path.basename(file))

## Score

Leaderboard A accuracy: 0.5604