In [18]:
import pandas as pd 
import json
import csv
import nba_api
import aiohttp
import asyncio
import time
import requests
from nba_api.stats.endpoints import leaguegamefinder
from aiohttp import ClientSession, TCPConnector
import random
import math
from contextlib import asynccontextmanager
import numpy as np
import itertools
import backoff_utils
from backoff_utils import backoff
from backoff_utils import apply_backoff
from backoff_utils import strategies

In [19]:
headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'x-nba-stats-origin': 'stats',
    'x-nba-stats-token': 'true',
    'Origin': 'https://www.nba.com',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Referer': 'https://www.nba.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'Pragma': 'no-cache',
    'Cache-Control': 'no-cache',
}

In [20]:

#max no of tokens is the rate limit
class RateLimiter:
    def __init__(self,rate_limit: int,concurrency_limit: int) -> None:
        #rate limit = no of request per second, rps
        self.rate_limit = rate_limit
        self.tokens_queue = asyncio.Queue(rate_limit)
        #consume tokens from queue at constant rate
        self.tokens_consumer_task = asyncio.create_task(self.consume_tokens())
        self.semaphore = asyncio.Semaphore(concurrency_limit)
    #
    async def add_token(self) -> None:
        await self.tokens_queue.put(1)
        return None

    async def consume_tokens(self):
        try:
            consumption_rate = 1 / self.rate_limit
            last_consumption_time = 0

            while True:
                if self.tokens_queue.empty():
                    await asyncio.sleep(consumption_rate)
                    continue

                current_consumption_time = time.monotonic()
                total_tokens = self.tokens_queue.qsize()
                tokens_to_consume = self.get_tokens_amount_to_consume(
                    consumption_rate,
                    current_consumption_time,
                    last_consumption_time,
                    total_tokens
                )

                for i in range(0, tokens_to_consume):
                    self.tokens_queue.get_nowait()

                last_consumption_time = time.monotonic()

                await asyncio.sleep(consumption_rate)
        except asyncio.CancelledError:
            # you can ignore the error here and deal with closing this task later but this is not advised
            raise
        except Exception as e:
            # do something with the error and re-raise
            raise

    @staticmethod
    def get_tokens_amount_to_consume(consumption_rate, current_consumption_time, last_consumption_time, total_tokens):
        #time btwn iteration
        time_from_last_consumption = current_consumption_time - last_consumption_time
        
        calculated_tokens_to_consume = math.floor(time_from_last_consumption / consumption_rate)
        tokens_to_consume = min(total_tokens, calculated_tokens_to_consume)
        return tokens_to_consume

    @asynccontextmanager
    async def throttle(self):
        await self.semaphore.acquire()
        await self.add_token()
        try:
            yield
        finally:
            self.semaphore.release()

    async def __aenter__(self):
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        if exc_type:
            # log error here and safely close the class
            pass

        await self.close()

    async def close(self) -> None:
        if self.tokens_consumer_task and not self.tokens_consumer_task.cancelled():
            try:
                self.tokens_consumer_task.cancel()
                await self.tokens_consumer_task
            except asyncio.CancelledError:
                # we ignore this exception but it is good to log and signal the task was cancelled
                pass
            except Exception as e:
                # log here and deal with the exception
                raise

In [21]:
async def send_request(game_id, client_session: aiohttp.ClientSession , rate_limiter: RateLimiter):
    start1= time.time()
    async with rate_limiter.throttle():
        url = f'https://stats.nba.com/stats/boxscorefourfactorsv2?EndPeriod=1&EndRange=0&GameID={game_id}&RangeType=0&StartPeriod=1&StartRange=0'
        print(f'sending url: {url}')
        end1= time.time()-start1
        print(end1)
        start = time.time()
        """ try:
            timeout = aiohttp.ClientTimeout(total=None,connect=None,sock_connect=None, sock_read=3)
            response = await client_session.get(url, headers = headers, timeout=timeout)
        except TimeoutError: """
        timeout = aiohttp.ClientTimeout(total=None,connect=None,sock_connect=None, sock_read=None)
        response = backoff(response = await client_session.get(url, headers = headers, timeout=timeout),
                 max_tries = 3,
                 max_delay = 30,
                 strategy = strategies.Exponential)

        
        print(f'releasing throttler')

    # Why are the following lines not included in the rate limiter context?
    # because all we want to control is the rate of io operations
    # and since the following lines instruct reading the response stream into memory,
    # it shouldn't block the next requests from sending
    # (unless you have limited memory or large files to ingest.
    # In that case you should add it to the context 
    # but also make sure you free memory for the next requests)!
    # so we should now release the semaphore and let the
    # stream reading begin async while letting the rest of the requests go on sending
    
    print(f'reading stream of response from {url}')
    roundtrip = time.time()-start
    print(roundtrip)
    status = response.status
    print(status)
    #retries the 504 code using expnl backoff algo
    if status == 504:
                response = backoff(response = await client_session.get(url, headers = headers, timeout=timeout),
                 max_tries = 3,
                 max_delay = 30,
                 strategy = strategies.Exponential)
    
    else:
        pass
    
    data = await response.read()
    hashrate = json.loads(data)
    #raise Exception('No data')
    #hashrate =  await asyncio.wait_for( await response.read(), 5)
    #resp = await response.json(content_type=None)
    response.release()

    return hashrate


async def send_multiple_requests(data):
    async with RateLimiter(rate_limit=3, concurrency_limit=2) as rate_limiter:
        async with aiohttp.ClientSession() as session:
            tasks = [asyncio.ensure_future(send_request(game_id,client_session=session,rate_limiter=rate_limiter)) for game_id in itertools.chain.from_iterable(data)]
            return await asyncio.gather(*tasks)


async def main():
    return await send_multiple_requests(data)


In [22]:
boxscorefourfactorsv2_21_14 = await main()

sending url: https://stats.nba.com/stats/boxscorefourfactorsv2?EndPeriod=1&EndRange=0&GameID=0022001079&RangeType=0&StartPeriod=1&StartRange=0
0.00010752677917480469
sending url: https://stats.nba.com/stats/boxscorefourfactorsv2?EndPeriod=1&EndRange=0&GameID=0022001071&RangeType=0&StartPeriod=1&StartRange=0
5.364418029785156e-05
releasing throttler
reading stream of response from https://stats.nba.com/stats/boxscorefourfactorsv2?EndPeriod=1&EndRange=0&GameID=0022001071&RangeType=0&StartPeriod=1&StartRange=0
1.0858192443847656
200
sending url: https://stats.nba.com/stats/boxscorefourfactorsv2?EndPeriod=1&EndRange=0&GameID=0022001074&RangeType=0&StartPeriod=1&StartRange=0
1.0863032341003418
releasing throttler
reading stream of response from https://stats.nba.com/stats/boxscorefourfactorsv2?EndPeriod=1&EndRange=0&GameID=0022001074&RangeType=0&StartPeriod=1&StartRange=0
0.3051607608795166
200
sending url: https://stats.nba.com/stats/boxscorefourfactorsv2?EndPeriod=1&EndRange=0&GameID=0022

ServerTimeoutError: Timeout on reading data from socket

sending url: https://stats.nba.com/stats/boxscorefourfactorsv2?EndPeriod=1&EndRange=0&GameID=0022000895&RangeType=0&StartPeriod=1&StartRange=0
74.30062675476074
sending url: https://stats.nba.com/stats/boxscorefourfactorsv2?EndPeriod=1&EndRange=0&GameID=0022000892&RangeType=0&StartPeriod=1&StartRange=0
74.30073809623718


In [18]:
with open("C:/Users/DK/nbadata/offensive/boxscorefourfactorsv2_21_14.json", 'w') as f:
    # indent=2 is not needed but makes the file human-readable
    json.dump(boxscorefourfactorsv2_21_14, f, indent=2) 

#with open("file.json", 'r') as f:
    #score = json.load(f)
#print(score)

NameError: name 'boxscorefourfactorsv2_21_14' is not defined

In [5]:
seasons = [
    '2020-21',
    '2019-20',
    '2018-19',
    '2017-18',
    '2016-17',
    '2015-16',
    '2014-15',
    '2013-14',
    '2012-13',
    '2011-12',
    '2010-11',
    '2009-10',
    '2008-09',
    '2007-08',
    '2006-07'

]

In [5]:
season1 =[
    '2020-21',
    '2019-20',
    '2018-19',
    '2017-18',
    '2016-17',
    '2015-16',
    '2014-15',
    '2013-14'
]

In [6]:
from nba_api.stats.endpoints import leaguegamefinder

In [7]:
def get_games_data(season_nullable):
    gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable=season_nullable, league_id_nullable='00', season_type_nullable='Regular Season',headers = headers , timeout = 100)
    games = gamefinder.get_data_frames()[0]
    return games


In [8]:
#function to get games 2006-21 
games_list = []
for season_nullable in season1:
    game_data = get_games_data(season_nullable)
    games_list.append(game_data)

seasons1df = pd.concat(games_list, ignore_index=True)

In [9]:
game_ids=seasons1df['GAME_ID'].unique().tolist()

In [10]:
len(game_ids)

9519

In [11]:
def split_list(a, n):
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))

In [12]:
data = list(split_list(game_ids,40))