In [1]:
import os
from bs4 import BeautifulSoup
import urllib.request
import datetime
import pandas as pd
import altair as alt
from Functions import *
from MatchClass import *

In [2]:
def convert_date_season(date):
    '''
    This functions receives a date in format "weekday, month day, year" and returns it in format "YYYYMMDD"
    '''
    date = datetime.datetime.strptime(date, "%a, %b %d, %Y")
    return date.strftime("%Y/%m/%d")


def treat_drought(drought):
    hours, mins, seconds = drought.split(":")
    return int(mins) + int(seconds)/60


def get_value(game, statistic, team, category, player):
    if statistic == "streak":
        return game.greatest_streak()[team]
    if statistic == "partial":
        return game.greatest_partial()[team]
    if statistic == "drought":
        value = game.longest_drought()[team]
        return treat_drought(value)
    if statistic == "box score":
        boxScore = game.box_scores()[team]
        value = game.filter_by_categories(boxScore, [category])
        if value is None:
            return None
        value = game.filter_by_players(value, [player])
        if value is None:
            return None
        return value.iloc[0,0]

In [3]:
game = Match("Denver", "Dallas", "2021/01/07")
val = get_value(game, "box score", 0, "2PtM", "J. Murray")

TypeError: unhashable type: 'list'

In [17]:
def treat_match(row, team, currentDate, table, statistic, category, player):
    '''
    This function treats all the matches in a season and does the desired computation for each of them
    - row: row in the season table (bs4.element.Tag)
    - team: name of the team. It can be the city, the club name or a combination (string)
    - currentDate: date at the time of the query (string)
    '''
    cols = row.find_all('td')
    if len(cols) == 14:
        date = cols[0].text
        location = cols[4].text
        opTeam = cols[5].text

        date = convert_date_season(date)
        isAway = (location == "@")
        if isAway:
            home = opTeam
            away = team
        else:
            home = team
            away = opTeam
        if date < currentDate:
            game = Match(home, away, date)
            value = get_value(game, statistic, isAway, category, player)
            row = [date, opTeam, value]
            row = pd.Series(row, index=["Date", "Opponent", "Value"])
            table = table.append(row, ignore_index=True)
    return table


def plot_line(table, statistic):
    chart = alt.Chart(
        table.reset_index().dropna()
    ).mark_line(
        point=True
    ).encode(
        x = alt.X('index:T', title = "match"),
        y = alt.Y('Value:Q', title = statistic),
        tooltip = ['Date:T', 'Opponent:N']
    ).add_selection(
        alt.selection_single()
    )

    rule = alt.Chart(table).mark_rule(color='darkblue').encode(
        y = alt.Y('mean(Value):Q')
    )
    return chart + rule

In [18]:
def main(team, season, statistic, category=None, player=None):
    '''
    This function explores all the matches of a team during a season
    - team: name of the team. It can be the city, the club name or a combination (string)
    - season: season that is going to be analyzed (string)
    '''
    os.chdir(os.path.abspath(''))

    currentDate = datetime.date.today().strftime("%Y/%m/%d")
    shortTeam = get_team(team)
    season = season.split("-")[1]
    webpage = f"https://www.basketball-reference.com/teams/{shortTeam}/{season}_games.html"
    if statistic == "box score" and player is None:
        player = "TOTAL"

    response = urllib.request.urlopen(webpage)
    htmlDoc = response.read()
    soup = BeautifulSoup(htmlDoc, 'html.parser')
    res = soup.find_all('tr')

    table = pd.DataFrame(columns=["Date", "Opponent", "Value"])
    
    for line in res:
        table = treat_match(line, team, currentDate, table, statistic, category, player)

    if statistic == "box score":
        if player == "TOTAL":
            player = shortTeam
        statistic = f"{category} by {player}"
    return plot_line(table, statistic)

In [19]:
graf = main("Memphis", "2020-2021", "box score", "2PtM", "B. Clarke")
graf.properties(width=750)

In [14]:
graf = main("Memphis", "2020-2021", "box score", ["2PtM"])
graf.properties(width=750)

In [6]:
graf = main("Memphis", "2020-2021", "streak")
graf.properties(width=750)

          Date                Opponent Value
0   2020/12/23       San Antonio Spurs    13
1   2020/12/26           Atlanta Hawks    13
2   2020/12/28           Brooklyn Nets    11
3   2020/12/30          Boston Celtics    13
4   2021/01/01       Charlotte Hornets     9
..         ...                     ...   ...
61  2021/04/30           Orlando Magic    12
62  2021/05/01           Orlando Magic    22
63  2021/05/03         New York Knicks     9
64  2021/05/05  Minnesota Timberwolves    12
65  2021/05/06         Detroit Pistons     9

[66 rows x 3 columns]


In [14]:
graf = main("Memphis", "2020-2021", "partial")
graf.properties(width=750)

In [15]:
graf = main("Memphis", "2020-2021", "drought")
graf.properties(width=750)