# Train Models for Different Opponents
We will use Logistic Regression to train the shot data of different opponents, and record the coefficients into a file.  
Apply the same process to opponents instead of players, we may find the characteristics or weaknesses of a team.  

### 1. Read the data

In [1]:
import os

Specify data path

In [2]:
data_path = './opponent_data'

Get the file name of all opponents

In [3]:
fname_list = os.listdir(data_path)
fpath_list = []
teamid_list = []
for fname in fname_list:
    fpath = os.path.join(data_path, fname)
    if os.path.isfile(fpath):
        fpath_list.append(fpath)
        teamid_list.append(int(os.path.splitext(fname)[0]))

In [4]:
print(teamid_list)

[1610612737, 1610612738, 1610612739, 1610612740, 1610612741, 1610612742, 1610612743, 1610612744, 1610612745, 1610612746, 1610612747, 1610612748, 1610612749, 1610612750, 1610612751, 1610612752, 1610612753, 1610612754, 1610612755, 1610612756, 1610612757, 1610612758, 1610612759, 1610612760, 1610612761, 1610612762, 1610612763, 1610612764, 1610612765, 1610612766]


### 2. The functions to process the data

In [5]:
import csv
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

The features we used in training models  
From 2015-2016 season, NBA shot data does not contain any record whose "combined shot type" is "Tip shot". So we exclude the feature of "COMBINED_SHOT_TYPE_Tip shot".

In [6]:
combinedshottype_list = ['COMBINED_SHOT_TYPE_Jump Shot', 
               'COMBINED_SHOT_TYPE_Layup', 'COMBINED_SHOT_TYPE_Dunk', 
               'COMBINED_SHOT_TYPE_Hook Shot', 'COMBINED_SHOT_TYPE_Bank Shot']
feature_list = ['PERIOD', 'REMAIN_TIME', 'DIST', 'ANGLE'] + combinedshottype_list

Data Cleaning

In [7]:
def clean_data(df_old, feature_list):
    df = df_old.copy()
    
    # Change the action type into combined shot type
    action={'Jump Shot': 'Jump Shot', 'Driving Dunk Shot': 'Dunk', 'Layup Shot':'Layup',
           'Running Jump Shot':'Jump Shot', 'Reverse Dunk Shot':'Dunk', 'Slam Dunk Shot':'Dunk',
           'Driving Layup Shot':'Layup', 'Turnaround Jump Shot':'Jump Shot', 'Reverse Layup Shot':'Layup',
           'Tip Shot':'Tip Shot', 'Running Hook Shot':'Hook Shot', 'Alley Oop Dunk Shot':'Dunk',
           'Dunk Shot':'Dunk', 'Alley Oop Layup shot':'Layup', 'Running Dunk Shot':'Dunk',
           'Driving Finger Roll Shot':'Layup', 'Running Layup Shot':'Layup',
           'Finger Roll Shot':'Layup', 'Fadeaway Jump Shot':'Jump Shot', 'Follow Up Dunk Shot':'Dunk',
           'Hook Shot':'Hook Shot', 'Turnaround Hook Shot':'Hook Shot', 'Jump Hook Shot':'Jump Shot',
           'Running Finger Roll Shot':'Layup', 'Jump Bank Shot':'Jump Shot',
           'Turnaround Finger Roll Shot':'Layup', 'Hook Bank Shot':'Bank Shot',
           'Driving Hook Shot':'Hook Shot', 'Running Tip Shot':'Tip Shot',
           'Running Reverse Layup Shot':'Layup', 'Driving Finger Roll Layup Shot':'Layup',
           'Fadeaway Bank shot':'Bank Shot', 'Pullup Jump shot':'Jump Shot', 'Finger Roll Layup Shot':'Layup',
           'Turnaround Fadeaway shot':'Jump Shot', 'Driving Reverse Layup Shot':'Layup',
           'Driving Slam Dunk Shot':'Dunk', 'Step Back Jump shot':'Jump Shot',
           'Turnaround Bank shot':'Bank Shot', 'Reverse Slam Dunk Shot':'Dunk',
           'Floating Jump shot':'Jump Shot', 'Putback Slam Dunk Shot':'Dunk',
           'Running Bank shot':'Bank Shot', 'Driving Bank shot':'Bank Shot', 'Driving Jump shot':'Jump Shot',
           'Putback Layup Shot':'Layup', 'Putback Dunk Shot':'Dunk',
           'Running Finger Roll Layup Shot':'Layup', 'Pullup Bank shot':'Bank Shot',
           'Running Slam Dunk Shot':'Dunk', 'Cutting Layup Shot':'Layup',
           'Driving Floating Jump Shot':'Jump Shot', 'Running Pull-Up Jump Shot':'Jump Shot',
           'Tip Layup Shot':'Layup', 'Driving Floating Bank Jump Shot':'Jump Shot',
           'Cutting Finger Roll Layup Shot':'Layup', 'Turnaround Fadeaway Bank Jump Shot':'Jump Shot',
           'Step Back Bank Jump Shot':'Jump Shot', 'Cutting Dunk Shot':'Dunk', 'Running Bank Hook Shot': 'Hook Shot', 
            'Turnaround Bank Hook Shot': 'Hook Shot', 'Tip Dunk Shot': 'Dunk', 'Driving Reverse Dunk Shot': 'Dunk', 
           'Jump Bank Hook Shot': 'Jump Shot', 'Driving Bank Hook Shot': 'Bank Shot', 'No Shot': 'Jump Shot', 
           'Running Alley Oop Layup Shot': 'Layup', 'Running Alley Oop Dunk Shot': 'Dunk', 
           'Running Reverse Dunk Shot':'Dunk', 'Putback Reverse Dunk Shot': 'Dunk'}
    
    df['COMBINED_SHOT_TYPE']=df.ACTION_TYPE.apply(lambda x:action[x])
    df=df.drop('ACTION_TYPE', axis=1)
    
    # Combine the minutes_remaining and seconds_remaining into remain_time
    df['REMAIN_TIME'] = df['MINUTES_REMAINING'] * 60 + df['SECONDS_REMAINING']
    df = df.drop(['MINUTES_REMAINING', 'SECONDS_REMAINING'], axis = 1)
    
    # Compute the shot distance and shot angle from loc_x and loc_y
    df['DIST']=(df.LOC_X**2+df.LOC_Y**2)**0.5
    df['ANGLE']=np.arctan2(df.LOC_X,df.LOC_Y)/np.pi*180.0
    
    # Use the one-hot coding for combined_shot_type
    features_onehot = ['COMBINED_SHOT_TYPE']
    for i in features_onehot:
        df = pd.concat([df, pd.get_dummies(df[i],prefix=i)], 1)
        df = df.drop(i, 1)
    
    # If the column of required feature is empty, fill the column with 0
    for i in feature_list:
        if i not in df.columns.values:
            df[i] = 0
    
    # Exclude the shot from back court
    df = df[df.DIST < 320.0]
    
    # Extract required features from data frame
    df = df[feature_list+['SHOT_MADE_FLAG']]
    
    return df

Function to train the model

In [8]:
def TrainModel(df, scaler=None):
    X = df.drop('SHOT_MADE_FLAG', axis=1)
    y = df['SHOT_MADE_FLAG']
    
    # Use the given scaler to transform features into same magnitude
    if scaler != None:
        X = scaler.transform(X)
    
    clf = LogisticRegression(C=1.0, max_iter=300, solver='lbfgs')
    clf.fit(X, y)
    
    # Change the data type of coefficients into float
    coef = clf.coef_[0]
    coef = list(map(float, coef))
    intercept = float(clf.intercept_[0])
    
    return coef, intercept

Begin training for all opponents  
Save the coefficients into a csv file

In [9]:
with open('Opponents_Model.csv', 'w', newline='') as f_rslt:
    writer = csv.writer(f_rslt)
    writer.writerow(['OPPONENT_TEAM_ID', 'OPPONENT_TEAM_NAME'] + feature_list + ['INTERCEPT'])
    for i in range(len(fpath_list[:])):
        df = pd.read_csv(fpath_list[i])
        opponent_id = int(df.OPPONENT_TEAM_ID[0])
        opponent_name = str(df.OPPONENT_TEAM_NAME[0])
        df = clean_data(df, feature_list)
        if i == 0:
            X = df.drop('SHOT_MADE_FLAG', axis=1)
            scaler = StandardScaler().fit(X)
        coef, intercept = TrainModel(df, scaler)
        writer.writerow([opponent_id, opponent_name] + coef + [intercept])