## Find [gameid] of games by a specific league in FIBA LiveStats

**baseurl**: https://www.fibalivestats.com/u/[league]/[game_id]/sc.html

[league] is the name of the league (e.g. UAAP)
[gameid] is the gameid for the specific game (e.g. 936275)

For example, the shotchart of the game between NU and UST during UAAP Season 81 (gameid:936275) is stored in FIBA LiveStats at: https://www.fibalivestats.com/u/UAAP/936275/sc.html

In [None]:
import requests

league = 'UAAP'    # the league you want to scrape data from
clue = '(Seniors Division)'    # a piece of text that appears on the webpage if the game/webpage exists, should be changed accordingly
baseurl = 'https://www.fibalivestats.com/u/{}'.format(league)  
start_id = 5000
end_id = 10000

for g_id in range(start_id, end_id + 1):     
# for g_id in range(end_id, start_id, -1):    # use this if you want to search from end_id to start_id
    url = "{}/{}/".format(baseurl, g_id)
    resp = requests.get(url)
    if resp.status_code == 200 and resp.text.find(clue) > -1:   # if the webpage and the clue exists, get the URL
        print(url)
    else:
        pass
#         print(g_id)
#     print("{}: {}".format(g_id, resp.status_code))


Once you have a list of URLS, you can:
1. use the URLS directly in the next step
2. download the whole HTML file
3. download the parts of the HTML with **div id="shotchart_data"** (this is contains the shot chart information)
    - you can add another div with **id=gameInfo** to and add a class inside it with the following information: class="date team opponent venue"
    - see [sample shotchart data](/sample-shotchart-html/936275.txt)

## Extract information from LiveStats shot chart HTML

In [None]:
from bs4 import BeautifulSoup
import pprint
import csv
import pandas as pd

### Extract the data for one game/file

In [None]:
shotchartfile = gameid
white = ''
black = ''
fg_list = []
with open(shotchartfile) as f:
    soup = BeautifulSoup(f, 'html.parser')
    game_info = soup.find_all(id='gameInfo')
    date, white, black, venue = game_info[0].attrs['class']
    shots = soup.find_all('span')
    for shot in shots:
        classes = shot.attrs['class']
        loc = shot.attrs['style']
        sh_info = shot.attrs['title']
        team0, made0 = classes[1].split('_')
        if team0 == 'white':
            team0 = white
            opp0 = black
        else:
            team0 = black
            opp0 = white
        if made0 == 'made':
            made0 = 1
        else:
            made0 = 0
        bottom0, left0 = [float(p.split(': ')[1].strip('%')) for p in loc.split(';')[:2]]
        if left0 > 50:
            bottom0 = 100 - bottom0
            left0 = 100 - left0
        
        # in meters
        bottom0 = (100 - bottom0) * 0.15
        left0 = left0 * 0.28
        
        # if basket is origin
        bottom0 = bottom0 - 7.5
        left0 = left0 - 1.43
        
        # if 10cm cell
        bottom0 = int(bottom0/0.1)
        left0 = int(left0/0.1)
        
        num0, player0, pt_type0 = sh_info.split(', ')
        pt_, sh_type0 = pt_type0.split('pt ')
        pt0 = int(pt_)
        info = {
            'team': team0,
            'opponent': opp0,
            'made': made0,
            'x': bottom0,
            'y': left0,
            'num': num0,
            'player': player0,
            'points': pt0,
            'shot_type': sh_type0,
            'date': date,
            'venue': venue
        }
        fg_list.append(info)

### Extract the data from a list of shotchart files

In [None]:
games = []   # add location of files here

fg_list = []
for game in games:
    with open('{}'.format(game)) as f:
        soup = BeautifulSoup(f, 'html.parser')
        game_info = soup.find_all(id='gameInfo')
        date, white, black, venue = game_info[0].attrs['class']
        shots = soup.find_all('span')
        for shot in shots:
            classes = shot.attrs['class']
            loc = shot.attrs['style']
            sh_info = shot.attrs['title']
            team0, made0 = classes[1].split('_')
            if team0 == 'white':
                team0 = white
                opp0 = black
            else:
                team0 = black
                opp0 = white
            if made0 == 'made':
                made0 = 1
            else:
                made0 = 0
            bottom0, left0 = [float(p.split(': ')[1].strip('%')) for p in loc.split(';')[:2]]
            if left0 > 50:
                bottom0 = 100 - bottom0
                left0 = 100 - left0

            # in meters
            bottom0 = (100 - bottom0) * 0.15
            left0 = left0 * 0.28

            # if basket is origin
            bottom0 = bottom0 - 7.5
            left0 = left0 - 1.43

            # if 10cm cell
            bottom0 = int(bottom0/0.1)
            left0 = int(left0/0.1)

            num0, player0, pt_type0 = sh_info.split(', ')
            pt_, sh_type0 = pt_type0.split('pt ')
            pt0 = int(pt_)
            info = {
                'team': team0,
                'opponent': opp0,
                'made': made0,
                'x': bottom0,
                'y': left0,
                'num': num0,
                'player': player0,
                'points': pt0,
                'shot_type': sh_type0,
                'date': date,
                'venue': venue
            }
            fg_list.append(info)

### Save to CSV

In [None]:
with open('fg.csv', 'w', encoding='utf8', newline='') as output_file:
    fc = csv.DictWriter(output_file, 
                        fieldnames=fg_list[0].keys(),
                       )
    fc.writeheader()
    fc.writerows(fg_list)