## Jump to the next section for the updates

In [3]:
import cv2
import requests
import numpy as np
import os
import time
from nba_api.stats.endpoints import leaguegamefinder

In [85]:
def get_season_games(season_year='2019-20'):
    gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable=season_year)
    games_df = gamefinder.get_data_frames()[0]
    return games_df['GAME_ID'].unique()

game_ids = get_season_games('2019-20')
game_ids

array(['0041900406', '0041900405', '0041900404', ..., '0011900004',
       '0011900002', '0011900001'], dtype=object)

In [86]:
subsetGMs=game_ids[350:360:2]
print(subsetGMs)

['2021900519' '0021900878' '0021900882' '0021900881' '0021900880']


In [89]:
headers = {
    'Host': 'stats.nba.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'x-nba-stats-origin': 'stats',
    'x-nba-stats-token': 'true',
    'Connection': 'keep-alive',
    'Referer': 'https://stats.nba.com/',
    'Pragma': 'no-cache',
    'Cache-Control': 'no-cache'
}

In [91]:
successful_video_events=[]
for gid in subsetGMs:
    for i in range(200):
        time.sleep(.2)
        if (i+1)%50 == 0:
            print(str((i+1)/2)+'% of', gid ,'complete')
        video_count=0
        event_id = i
        game_id = gid
        url = 'https://stats.nba.com/stats/videoeventsasset?GameEventID={}&GameID={}'.format(
            event_id, game_id)
        try:
            r = requests.get(url, headers=headers)
            json = r.json()
            video_urls = json['resultSets']['Meta'].get('videoUrls', [])
            playlist = json['resultSets'].get('playlist', [])
            
            if video_urls and playlist:  # Ensure there are items before accessing
                video_event = {'video': video_urls[0]['lurl'], 'desc': playlist[0]['dsc']}
                successful_video_events.append(video_event)
    
        except KeyError as e:
            print(f"KeyError encountered for event ID {event_id}: {e}")
        except Exception as e:
            print(f"An error occurred for event ID {event_id}: {e}")

25.0% of 2021900519 complete
50.0% of 2021900519 complete
75.0% of 2021900519 complete
100.0% of 2021900519 complete
25.0% of 0021900878 complete
50.0% of 0021900878 complete
75.0% of 0021900878 complete
100.0% of 0021900878 complete
25.0% of 0021900882 complete
50.0% of 0021900882 complete
75.0% of 0021900882 complete
100.0% of 0021900882 complete
25.0% of 0021900881 complete
50.0% of 0021900881 complete
75.0% of 0021900881 complete
100.0% of 0021900881 complete
25.0% of 0021900880 complete
50.0% of 0021900880 complete
75.0% of 0021900880 complete
100.0% of 0021900880 complete


In [94]:
sve=successful_video_events.copy()

In [96]:
sve_backup=sve.copy()
print(len(sve),len(sve_backup))

720 720


In [98]:
print('before pruning:',len(sve))

make=[]
miss=[]

for i in sve:
    # if not i['video']:
    #     sve.remove(i)
    # else:
    if 'MISS' in i['desc']:
        miss.append(i)
    elif 'Shot' in i['desc']:
        make.append(i)
        # else:
        #     sve.remove(i)
# print('after pruning:',len(sve),'\n makes:',len(make),'\n miss:',len(miss))
print('makes:',len(make),'\nmiss:',len(miss))

before pruning: 720
makes: 95 
miss: 172


## This is where new edits and code are added since v2.

In [100]:
count=0
for m in make:
    if 'Shot' in m['desc']:
        count+=1
print(count)

95


In [141]:
urls=[]
#now that we have our urls for our training data we can retrieve the videos
for u in make+miss:
    urls.append(i['video'])    

In [143]:
len(urls)

267

In [145]:
def download_video(url, filename):
    response = requests.get(url)
    if response.status_code == 200:
        # Specify subfolder
        folder_name = "clips_test"
        os.makedirs(folder_name, exist_ok=True)  # Create folder if it doesn't exist
        full_path = os.path.join(os.getcwd(),folder_name, filename)
        with open(full_path, 'wb') as f:
            f.write(response.content)
        return full_path
    else:
        print(f"Failed to download video: {response.status_code}")
        return None

In [147]:
for idx, url in enumerate(urls):
    filename= f"test_nba_clip{idx + 1}.mp4"  # Create a filename like test_nba_clip1.mp4
    download_video(url, filename)

In [7]:
def preprocess_video(video_path, frame_rate=1, resize_dim=(224, 224)):
    # Create a list to hold the processed frames
    frames = []

    # Open the video file
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return []

    frame_count = 0
    while True:
        # Read a frame from the video
        ret, frame = cap.read()
        if not ret:
            break
        
        # Extract frames at the specified frame rate
        if frame_count % frame_rate == 0:
            # Resize the frame
            frame = cv2.resize(frame, resize_dim)
            
            # Normalize the pixel values to [0, 1]
            frame = frame / 255.0
            
            # Append the processed frame to the list
            frames.append(frame)

        frame_count += 1

    # Release the video capture object
    cap.release()
    
    # Convert list of frames to a NumPy array
    return np.array(frames)


In [9]:
folder_name = "clips_test"
video_files = [f for f in os.listdir(folder_name) if f.endswith('.mp4')]

all_processed_frames = []  # List to hold frames from all videos

for video_file in video_files:
    video_path = os.path.join(folder_name, video_file)
    processed_frames = preprocess_video(video_path)
    
    # print(f"Processed {video_file}: Number of frames extracted and processed: {len(processed_frames)}")
    
    # Append the processed frames to the all_processed_frames list
    all_processed_frames.append(processed_frames)
print(len(all_processed_frames))

267


In [11]:
make_train=[]
miss_train=[]
make_test=[]
miss_test=[]
data_dict={1: [],0:[]}

for i,pf in enumerate(all_processed_frames):
    if i < 95: #len(make) if my computer force a restart
        data_dict[1]+=[pf]
    else:
        data_dict[0]+=[pf]

In [13]:
len(data_dict[0])

172

In [15]:
make_train=data_dict[1][:76]
miss_train=data_dict[0][:139]
make_test=data_dict[1][76:]
miss_test=data_dict[0][139:]

In [17]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing import image

In [25]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

## This is where things start to go bad. My kernel dies trying to create a dataframe from this data. Yaarghh

In [29]:
import pandas as pd

In [1]:
# df_train=pd.DataFrame(make_train, miss_train)

NameError: name 'pd' is not defined

In [33]:
print(len(make_train + miss_train))  # Check how many samples you have


215


### On an another attempt, my kernel ends up needing to restart again. Sad face.

In [None]:
X_train = np.array(make_train + miss_train).reshape(-1, 1)  # Reshape to (num_samples, 1)
y_train = np.array([1]*76 + [0]*139)  # Labels for classification

In [None]:
#My kernel died again... I need to reconsider my data. Right now the nested lists/arrays are too big to handle especially when the clips are 12 seconds long and the files take time and space to preprocess.
#This is what I would do

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(1,)))  # One feature per sample
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dense(2, activation='softmax'))  # Two output classes (made or missed)


In [None]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


### I should try downsampling my data.

### I can try preprocessing in a different way using something like this...

In [21]:
# Load the pre-trained model
model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')

def extract_features_from_frames(frame_paths):
    features = []
    
    for frame_path in frame_paths:
        img = image.load_img(frame_path, target_size=(224, 224))  # Resize to fit the model
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)  # Preprocess for MobileNetV2
        
        # Extract features
        feature = model.predict(img_array)
        features.append(feature.flatten())
    
    return np.array(features)

# Example usage
frame_folder = "key_frames"  # Folder containing extracted frames
frame_paths = [os.path.join(frame_folder, f) for f in os.listdir(frame_folder) if f.endswith('.jpg')]

# Extract features from key frames
features = extract_features_from_frames(frame_paths)

  model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')


In [31]:
# Define the model
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(features.shape[1],)))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dense(2, activation='softmax'))  # num_classes = number of event types

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit([1]*76+[0]*139, make_train+miss_train, epochs=10, batch_size=32, validation_data=([1]*19+[0]*37, make_test+miss_test))


IndexError: tuple index out of range

## Or I can follow another method like this: https://github.com/hkair/Basketball-Action-Recognition