## DS&S Technical Assessment

In [1]:
#importing packages
import pandas as pd 
import numpy as np

In [2]:
#access the csv file given 
df = pd.read_csv("shots_data.csv")

## df exploration 

In [3]:
#let's take a look at the data
df.head()

Unnamed: 0,team,x,y,fgmade
0,Team A,-23.1,3.5,0
1,Team A,0.0,25.1,1
2,Team A,0.5,1.0,1
3,Team A,-5.6,5.2,0
4,Team A,4.0,14.5,1


In [4]:
#checking the data types of each column 
df.dtypes

team       object
x         float64
y         float64
fgmade      int64
dtype: object

In [5]:
#understanding the data I am given
df.describe()

Unnamed: 0,x,y,fgmade
count,560.0,560.0,560.0
mean,1.049821,11.156071,0.392857
std,11.074739,9.487093,0.488822
min,-23.6,-1.5,0.0
25%,-4.25,2.0,0.0
50%,0.6,8.5,0.0
75%,7.475,20.725,1.0
max,23.8,29.7,1.0


## df analysis functions

In [6]:
#determining shot zones 
df["Shot Zone"] = "NA"

conditions = [
    (df["y"] <= 7.8) & (df["x"] > 22.0), #C3 RS
    (df["y"] <= 7.8) & (df["x"] < -22.0), #C3 LS
    (df["y"] <= 7.8) & (df["x"].between(-22, 22, inclusive = False)), #2PT where y <= 7.8
    (df["y"].between(7.8, 23.75, inclusive = False)) & (df["x"].between(0, 23.75, inclusive = False)), #2PT RS of arc
    (df["y"].between(7.8, 23.75, inclusive = False)) & (df["x"].between(-23.75, 0, inclusive = False)), #2PT LS of arc
    (df["Shot Zone"] == "NA"), #NC3: the rest 
]

values = ["C3","C3","2PT","2PT","2PT","NC3"]

df["Shot Zone"] = np.select(conditions, values)

In [None]:
#LOGIC: find the shots for corner 3 first because they are easiest.
#Next, find the 2-point shots. This is more difficult becuase of the arc,
#so first I found the 2-point shots that were y <= 7.8, then I found the
#2-point shots that were inside the right side of the arc and the left side of the arc.
#Now, since I have 2/3 categories, the rest of the shots are NC3s. 

In [7]:
#split df for teamA and teamB
teamA = df[df.team.str.contains("Team A")]
teamB = df[df.team.str.contains("Team B")]

In [23]:
#calculating shot distribution for each zone
def analysis(df):

    #total attempts of the team
    fga = len(df)
    
    #total attempts at 2PT
    two_pt = df[df["Shot Zone"] == "2PT"] #data for all 2PT
    twopt_attempts = len(two_pt) 
    
    #total attempts at NC3
    nc3 = df[df["Shot Zone"] == "NC3"] #data for all nc3
    nc3_attempts = len(nc3) 
    
    #total attempts at C3
    c3 = df[df["Shot Zone"] == "C3"] #data for all c3
    c3_attempts = len(c3) 
    
    #shot distribution
    twopt_percentage = round(twopt_attempts/fga*100,2)
    
    nc3_percentage = round(nc3_attempts/fga*100,2)
    
    c3_percentage = round(c3_attempts/fga*100,2)
       
    print("Shot Distribution:") #display shot distribution of the team
    print("2PT: " + str(twopt_percentage) + "%")
    print("NC3: " + str(nc3_percentage) + "%")
    print("C3: " + str(c3_percentage) + "%")
    
    #FGM
    fgm = len(df[df["fgmade"] == 1])
    
    #NC3PM
    nc3pm = len(nc3[nc3["fgmade"] == 1])
    
    #C3PM
    c3pm = len(c3[c3["fgmade"] == 1])
     
    #eFG of 2PT
    eFG_2PT = round((fgm/fga)*100,2)
    
    #eFG of NC3PM
    eFG_NC3PT = round(((fgm + (0.5*nc3pm))/fga)*100,2)
    
    #eFG of C3PM
    eFG_C3PT = round(((fgm + (0.5*c3pm))/fga)*100,2)
    
    
    print("eFG%:")
    print("2PT: " + str(eFG_2PT) + "%")
    print("NC3: " + str(eFG_NC3PT) + "%")
    print("C3: " + str(eFG_C3PT) + "%")

## deliverables

In [24]:
print("Team A Results")
analysis(teamA)

Team A Results
Shot Distribution:
2PT: 77.86%
NC3: 15.0%
C3: 7.14%
eFG%:
2PT: 37.5%
NC3: 40.18%
C3: 38.93%


In [25]:
print("Team B Results")
analysis(teamB)

Team B Results
Shot Distribution:
2PT: 78.93%
NC3: 13.57%
C3: 7.5%
eFG%:
2PT: 41.07%
NC3: 43.39%
C3: 41.96%
