# Deep Neural Network with NumPy

In [191]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 

## Parameter Initialization

In [193]:
def initialize_parameters_deep(layer_dims):
    """
    Arguments:
    layer_dims -- python array (list) containing the dimensions of each layer in our network
    
    Returns:
    parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
                    Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
                    bl -- bias vector of shape (layer_dims[l], 1)
    """
    
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims) 

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1))

        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))

    return parameters

In [194]:
# Example 
parameters = initialize_parameters_deep([5,4,3])
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[ 0.01788628  0.0043651   0.00096497 -0.01863493 -0.00277388]
 [-0.00354759 -0.00082741 -0.00627001 -0.00043818 -0.00477218]
 [-0.01313865  0.00884622  0.00881318  0.01709573  0.00050034]
 [-0.00404677 -0.0054536  -0.01546477  0.00982367 -0.01101068]]
b1 = [[0.]
 [0.]
 [0.]
 [0.]]
W2 = [[-0.01185047 -0.0020565   0.01486148  0.00236716]
 [-0.01023785 -0.00712993  0.00625245 -0.00160513]
 [-0.00768836 -0.00230031  0.00745056  0.01976111]]
b2 = [[0.]
 [0.]
 [0.]]


## Forward propagation

In [196]:
def linear_forward(A, W, b):
    """
    Implement the linear part of a layer's forward propagation.

    Arguments:
    A -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)

    Returns:
    Z -- the input of the activation function, also called pre-activation parameter 
    cache -- a python tuple containing "A", "W" and "b" ; stored for computing the backward pass efficiently
    """
    
    Z = np.dot(W,A) + b
    
    assert(Z.shape == (W.shape[0], A.shape[1]))
    cache = (A, W, b)
    
    return Z, cache

In [197]:
def sigmoid(Z):
    """
    Implements the sigmoid activation function.
    
    Arguments:
    Z -- numpy array of any shape
    
    Returns:
    A -- output of sigmoid(z), same shape as Z
    cache -- returns Z as well, useful for backprop
    """
    A = 1 / (1 + np.exp(-Z))
    cache = Z
    return A, cache

In [198]:
def relu(Z):
    """
    Implements the ReLU activation function.
    
    Arguments:
    Z -- numpy array of any shape
    
    Returns:
    A -- output of relu(z), same shape as Z
    cache -- returns Z as well, useful for backprop
    """
    A = np.maximum(0, Z)
    cache = Z
    return A, cache

In [199]:
def linear_activation_forward(A_prev, W, b, activation):
    """
    Implement the forward propagation for the LINEAR->ACTIVATION layer

    Arguments:
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"

    Returns:
    A -- the output of the activation function, also called the post-activation value 
    cache -- a python tuple containing "linear_cache" and "activation_cache";
             stored for computing the backward pass efficiently
    """
    
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b) # This "linear_cache" contains (A_prev, W, b)
        A, activation_cache = sigmoid(Z) 
    
    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b) # This "linear_cache" contains (A_prev, W, b)
        A, activation_cache = relu(Z) # This "activation_cache" contains "Z"
    
    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)

    return A, cache

In [200]:
def L_model_forward(X, parameters):
    """
    Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation
    
    Arguments:
    X -- data, numpy array of shape (input size, number of examples)
    parameters -- output of initialize_parameters_deep()
    
    Returns:
    AL -- last post-activation value
    caches -- list of caches containing:
                every cache of linear_activation_forward() (there are L-1 of them, indexed from 0 to L-1)
    """

    caches = []
    A = X
    L = len(parameters) // 2 # number of layers            
    
    # Implement [LINEAR -> RELU]*(L-1)
    for l in range(1, L):
        A_prev = A 
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], "relu")
        caches.append(cache)
    
    # Implement LINEAR -> SIGMOID.
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], "sigmoid")
    caches.append(cache)
    
    assert(AL.shape == (1,X.shape[1]))
            
    return AL, caches

## Cost Function

In [202]:
def compute_cost(AL, Y):
    """
    Implement the cost function defined by equation (7).

    Arguments:
    AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
    Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)

    Returns:
    cost -- cross-entropy cost
    """
    
    m = Y.shape[1]
    cost = (-1/m) * (np.dot(Y, np.log(AL).T) + np.dot((1-Y), np.log(1-AL).T))
    
    cost = np.squeeze(cost) # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
    assert(cost.shape == ())
    
    return cost

## Backward Propagation

In [204]:
def sigmoid_backward(dA, cache):
    """
    Implements the backward propagation for a single SIGMOID unit.

    Arguments:
    dA -- Gradient of the loss with respect to the activation output (A)
    cache -- 'Z' stored from the forward pass (sigmoid)

    Returns:
    dZ -- Gradient of the loss with respect to Z
    """
    Z = cache
    s = 1 / (1 + np.exp(-Z))
    dZ = dA * s * (1 - s)
    return dZ

In [205]:
def relu_backward(dA, cache):
    """
    Implements the backward propagation for a single RELU unit.

    Arguments:
    dA -- Gradient of the loss with respect to the activation output (A)
    cache -- 'Z' stored from the forward pass (ReLU)

    Returns:
    dZ -- Gradient of the loss with respect to Z
    """
    Z = cache
    dZ = np.array(dA, copy=True)  # just converting dA to a correct shape

    # When Z <= 0, set dZ to 0
    dZ[Z <= 0] = 0
    return dZ

In [206]:
def linear_backward(dZ, cache):
    # Here cache is "linear_cache" containing (A_prev, W, b) coming from the forward propagation in the current layer
    """
    Implement the linear portion of backward propagation for a single layer (layer l)

    Arguments:
    dZ -- Gradient of the cost with respect to the linear output (of current layer l)
    cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer

    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    A_prev, W, b = cache
    m = A_prev.shape[1]

    ### START CODE HERE ### (≈ 3 lines of code)
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T,dZ)
    ### END CODE HERE ###
    
    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    
    return dA_prev, dW, db

In [207]:
def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        ### START CODE HERE ### (≈ 1 line of code)
        dZ = relu_backward(dA, activation_cache)
        ### END CODE HERE ###
        
    elif activation == "sigmoid":
        ### START CODE HERE ### (≈ 1 line of code)
        dZ = sigmoid_backward(dA, activation_cache)
        ### END CODE HERE ###
    
    ### START CODE HERE ### (≈ 1 line of code)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    ### END CODE HERE ###
    
    return dA_prev, dW, db

In [208]:
def L_model_backward(AL, Y, caches):
    """
    Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group
    
    Arguments:
    AL -- probability vector, output of the forward propagation (L_model_forward())
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
    caches -- list of caches containing:
                every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
                the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
    
    Returns:
    grads -- A dictionary with the gradients
             grads["dA" + str(l)] = ... 
             grads["dW" + str(l)] = ...
             grads["db" + str(l)] = ... 
    """
    grads = {}
    L = len(caches) # the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
    
    # Initializing the backpropagation
    ### START CODE HERE ### (1 line of code)
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    ### END CODE HERE ###
    
    # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "dAL, current_cache". Outputs: "grads["dAL-1"], grads["dWL"], grads["dbL"]
    ### START CODE HERE ### (approx. 2 lines)
    current_cache = caches[L-1] # Last Layer
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid")
    ### END CODE HERE ###
    
    # Loop from l=L-2 to l=0
    for l in reversed(range(L-1)):
        # lth layer: (RELU -> LINEAR) gradients.
        # Inputs: "grads["dA" + str(l + 1)], current_cache". Outputs: "grads["dA" + str(l)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] 
        ### START CODE HERE ### (approx. 5 lines)
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation = "relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
        ### END CODE HERE ###

    return grads

In [209]:
def update_parameters(parameters, grads, learning_rate):
    """
    Update parameters using gradient descent
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    grads -- python dictionary containing your gradients, output of L_model_backward
    
    Returns:
    parameters -- python dictionary containing your updated parameters 
                  parameters["W" + str(l)] = ... 
                  parameters["b" + str(l)] = ...
    """
    
    L = len(parameters) // 2 # number of layers in the neural network

    # Update rule for each parameter. Use a for loop.
    ### START CODE HERE ### (≈ 3 lines of code)
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
    ### END CODE HERE ###
    return parameters

## Data 

In [211]:
import kagglehub
path = kagglehub.dataset_download("yagizfiratt/nba-players-database")
print("Path to dataset files:", path)

Path to dataset files: /Users/yuvan/.cache/kagglehub/datasets/yagizfiratt/nba-players-database/versions/1


In [212]:
df = pd.read_csv("/Users/yuvan/Desktop/DeepLearning/PlayerIndex_nba_stats.csv")
df.head()

Unnamed: 0,PERSON_ID,PLAYER_LAST_NAME,PLAYER_FIRST_NAME,PLAYER_SLUG,TEAM_ID,TEAM_SLUG,IS_DEFUNCT,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,...,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,ROSTER_STATUS,PTS,REB,AST,STATS_TIMEFRAME,FROM_YEAR,TO_YEAR
0,76001,Abdelnaby,Alaa,alaa-abdelnaby,1610612757,blazers,0,Portland,Trail Blazers,POR,...,1990.0,1.0,25.0,,5.7,3.3,0.3,Career,1990,1994
1,76002,Abdul-Aziz,Zaid,zaid-abdul-aziz,1610612745,rockets,0,Houston,Rockets,HOU,...,1968.0,1.0,5.0,,9.0,8.0,1.2,Career,1968,1977
2,76003,Abdul-Jabbar,Kareem,kareem-abdul-jabbar,1610612747,lakers,0,Los Angeles,Lakers,LAL,...,1969.0,1.0,1.0,,24.6,11.2,3.6,Career,1969,1988
3,51,Abdul-Rauf,Mahmoud,mahmoud-abdul-rauf,1610612743,nuggets,0,Denver,Nuggets,DEN,...,1990.0,1.0,3.0,,14.6,1.9,3.5,Career,1990,2000
4,1505,Abdul-Wahad,Tariq,tariq-abdul-wahad,1610612758,kings,0,Sacramento,Kings,SAC,...,1997.0,1.0,11.0,,7.8,3.3,1.1,Career,1997,2003


In [213]:
df['CAREER_SPAN'] = df['TO_YEAR'] - df['FROM_YEAR']
df.head()

Unnamed: 0,PERSON_ID,PLAYER_LAST_NAME,PLAYER_FIRST_NAME,PLAYER_SLUG,TEAM_ID,TEAM_SLUG,IS_DEFUNCT,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,...,DRAFT_ROUND,DRAFT_NUMBER,ROSTER_STATUS,PTS,REB,AST,STATS_TIMEFRAME,FROM_YEAR,TO_YEAR,CAREER_SPAN
0,76001,Abdelnaby,Alaa,alaa-abdelnaby,1610612757,blazers,0,Portland,Trail Blazers,POR,...,1.0,25.0,,5.7,3.3,0.3,Career,1990,1994,4
1,76002,Abdul-Aziz,Zaid,zaid-abdul-aziz,1610612745,rockets,0,Houston,Rockets,HOU,...,1.0,5.0,,9.0,8.0,1.2,Career,1968,1977,9
2,76003,Abdul-Jabbar,Kareem,kareem-abdul-jabbar,1610612747,lakers,0,Los Angeles,Lakers,LAL,...,1.0,1.0,,24.6,11.2,3.6,Career,1969,1988,19
3,51,Abdul-Rauf,Mahmoud,mahmoud-abdul-rauf,1610612743,nuggets,0,Denver,Nuggets,DEN,...,1.0,3.0,,14.6,1.9,3.5,Career,1990,2000,10
4,1505,Abdul-Wahad,Tariq,tariq-abdul-wahad,1610612758,kings,0,Sacramento,Kings,SAC,...,1.0,11.0,,7.8,3.3,1.1,Career,1997,2003,6


In [214]:
df = df.loc[:,['POSITION','HEIGHT','DRAFT_YEAR','WEIGHT','PTS','REB','AST','CAREER_SPAN']]
df.head()

Unnamed: 0,POSITION,HEIGHT,DRAFT_YEAR,WEIGHT,PTS,REB,AST,CAREER_SPAN
0,F,6-10,1990.0,240.0,5.7,3.3,0.3,4
1,C,6-9,1968.0,235.0,9.0,8.0,1.2,9
2,C,7-2,1969.0,225.0,24.6,11.2,3.6,19
3,G,6-1,1990.0,162.0,14.6,1.9,3.5,10
4,F-G,6-6,1997.0,235.0,7.8,3.3,1.1,6


In [215]:
df.shape

(5025, 8)

In [216]:
df.isna().sum()

POSITION         48
HEIGHT           47
DRAFT_YEAR     1325
WEIGHT           53
PTS              24
REB             316
AST              24
CAREER_SPAN       0
dtype: int64

In [217]:
print(df['DRAFT_YEAR'].dtype)
df['DRAFT_YEAR'] = df['DRAFT_YEAR'].fillna(0).astype(int)
print(df['DRAFT_YEAR'].dtype)

float64
int64


In [218]:
ast_mean = np.round(df['AST'].mean(skipna=True),decimals=1)
df['AST']= df['AST'].fillna(ast_mean)
ast_mean

1.4

In [219]:
reb_mean = np.round(df['REB'].mean(skipna=True),decimals=1)
df['REB']= df['REB'].fillna(reb_mean)
reb_mean

2.9

In [220]:
pts_mean = np.round(df['PTS'].mean(skipna=True),decimals=1)
df['PTS']= df['PTS'].fillna(pts_mean)
pts_mean

6.3

In [221]:
weight_mean = np.round(df['WEIGHT'].mean(skipna=True),decimals=1)
df['WEIGHT']= df['WEIGHT'].fillna(weight_mean)
weight_mean

211.4

In [222]:
df['POSITION'].fillna(df['POSITION'].mode()[0], inplace=True)

In [223]:
df['HEIGHT'].fillna(df['HEIGHT'].mode()[0], inplace=True)

In [224]:
print(df['POSITION'].value_counts())

POSITION
G      1996
F      1822
C       676
G-F     196
F-C     140
C-F     114
F-G      81
Name: count, dtype: int64


In [225]:
df['HEIGHT'].dtype

dtype('O')

In [226]:
df.isna().sum()

POSITION       0
HEIGHT         0
DRAFT_YEAR     0
WEIGHT         0
PTS            0
REB            0
AST            0
CAREER_SPAN    0
dtype: int64

In [227]:
average_career = np.round(df['CAREER_SPAN'].mean(),decimals=2)
average_career

4.33

In [228]:
df["ABOVE_AVERAGE"] = df["CAREER_SPAN"] >= average_career
df.head()

Unnamed: 0,POSITION,HEIGHT,DRAFT_YEAR,WEIGHT,PTS,REB,AST,CAREER_SPAN,ABOVE_AVERAGE
0,F,6-10,1990,240.0,5.7,3.3,0.3,4,False
1,C,6-9,1968,235.0,9.0,8.0,1.2,9,True
2,C,7-2,1969,225.0,24.6,11.2,3.6,19,True
3,G,6-1,1990,162.0,14.6,1.9,3.5,10,True
4,F-G,6-6,1997,235.0,7.8,3.3,1.1,6,True


In [234]:
y = df["ABOVE_AVERAGE"]
X = df.drop(['CAREER_SPAN','ABOVE_AVERAGE'],axis=1)

In [236]:
X.head()

Unnamed: 0,POSITION,HEIGHT,DRAFT_YEAR,WEIGHT,PTS,REB,AST
0,F,6-10,1990,240.0,5.7,3.3,0.3
1,C,6-9,1968,235.0,9.0,8.0,1.2
2,C,7-2,1969,225.0,24.6,11.2,3.6
3,G,6-1,1990,162.0,14.6,1.9,3.5
4,F-G,6-6,1997,235.0,7.8,3.3,1.1


In [244]:
y[:5]

0    False
1     True
2     True
3     True
4     True
Name: ABOVE_AVERAGE, dtype: bool

In [240]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
X["HEIGHT"] = encoder.fit_transform(X["HEIGHT"])
X.head()

Unnamed: 0,POSITION,HEIGHT,DRAFT_YEAR,WEIGHT,PTS,REB,AST
0,F,10,1990,240.0,5.7,3.3,0.3
1,C,19,1968,235.0,9.0,8.0,1.2
2,C,22,1969,225.0,24.6,11.2,3.6
3,G,9,1990,162.0,14.6,1.9,3.5
4,F-G,16,1997,235.0,7.8,3.3,1.1


In [258]:
y = np.array(encoder.fit_transform(y))
y[:5]

array([0, 1, 1, 1, 1])

In [256]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[("encoding",OneHotEncoder(),[0])],remainder="passthrough")
# the method below does not return a np.array but we need X to be a np.array
X = np.array(ct.fit_transform(X))
X

array([[ 0. ,  1. ,  0. , ...,  5.7,  3.3,  0.3],
       [ 1. ,  0. ,  1. , ...,  9. ,  8. ,  1.2],
       [ 1. ,  0. ,  1. , ..., 24.6, 11.2,  3.6],
       ...,
       [ 0. ,  1. ,  0. , ...,  7.2,  3.3,  1.5],
       [ 0. ,  1. ,  0. , ...,  1.8,  2.5,  0.7],
       [ 0. ,  1. ,  0. , ...,  3.5,  3.1,  1.4]])