In [None]:
import pandas as pd
import re
import matplotlib.pyplot as plt
from matplotlib import animation
from matplotlib.animation import FuncAnimation
import numpy as np
plt.style.use('ggplot')

# This program displays an animated bar graph to compare each season's overall
# points (with and without VAR penalty calls).

# Abbreviated Team Names
abbv = {
    'Liverpool' : "L'pool",
    'Tottenham' : 'Spurs',
    'West Ham': 'W. Ham',
    'Leicester City': 'LCFC',
    'Newcastle Utd': 'NUFC',
    'Crystal Palace': 'CPFC',
    'Brentford': "Bees",
    'Aston Villa': "AVFC",
    'Southampton': 'Soton',
    'Leeds Utd': 'Leeds',
    'Norwich City': 'NW City',
    'Sheffield Utd':  'SUFC',
    'AFC Bournemouth':  'AFCB',
    'W.B. Albion': 'WBA'
}

# This function returns the average of a list
def getAverage(numList):
  total = 0
  for i in range(len(numList)):
    total += numList[i]
  return (float(total) / float(len(numList)))

# These functions will help set the bar positions at each frame.
def listAt(aList, index):
  finalList = []
  for i in range(len(aList)):
    finalList.append((intervalList(aList[i]))[index])
  return finalList

def intervalList(pts):
  return (np.linspace(0, pts)).tolist()

def seasons_str(csv):
  column_names = list(csv.columns)
  season_string = ""
  for cols in column_names:
    if re.match(r"20\d\d/\d\d", cols) and cols != '2017/18' and cols != '2018/19':
      season_string += f'\n- {cols}'

  return season_string

try:
    # Read and parse data from CSV file using URL
    data = pd.read_csv("https://raw.githubusercontent.com/SebastianAshcallay/CMSC206/main/GroupProject_PL_Table[version2].csv")

    # Ask user to indicate which season to display
    #season = input("\nWhich season do you want to graph? \n- 2019/20\n- 2020/21\n- 2021/22\n>>> ")
    season = input(f"\nWhich season do you want to graph? {seasons_str(data)}\n>>> ")
    teams = data[season].tolist()
    for n in range(len(teams)):
      if teams[n] in abbv.keys():
        teams[n] = abbv[teams[n]]

    # Sets VAR vs. no VAR overall points for each team in the indicated season
    varPts = data[f'Pts ({season})'].tolist()
    noVarPts = data[f'Pts (No VAR)({season})'].tolist()

    # Add Average Points (VAR v. No VAR)
    avgVarPts = getAverage(varPts)
    varPts.append(int(round(avgVarPts)))
    avgNoVarPts = getAverage(noVarPts)
    noVarPts.append(int(round(avgNoVarPts)))
    teams.append('Avg Pts')

    # Print Results
    print(f"\nIn the Premier League's {season} season, the participating teams averaged"
            + f"\na total of {round(avgVarPts)} pts. with VAR, and a total of {round(avgNoVarPts)} pts. without VAR.")
    if round(avgNoVarPts) > round(avgVarPts):  
      print(f'\n>>> Teams Averaged more Points without VAR <<<')
    elif round(avgVarPts) > round(avgNoVarPts):
      print(f'\n>>> Teams Averaged more Points with VAR <<<')
    else:
      print(f'\n>>> Teams Averaged same number of Points with/without VAR <<<')

    # Describe Red/Green X-axis Labels
    print('\n\nNote: X-axis label colors describe which teams were benefitted/disadvantaged'
            + '\nby the implementation of VAR:')
    print('* Red: Disadvatanged (dropped positions in final standings)')
    print('* Green: Benefitted (climbed positions in final standings)')
    print('* Black: Same position with/without VAR penalty calls\n\n')

    # Create Graph Background
    df = pd.DataFrame(dict(graph= teams,n=varPts, m=noVarPts))
    ind = np.arange(len(df))
    width = 0.4

    fig, ax = plt.subplots()

    # Add FuncAnimation
    count = 0
    def animate(i):
        global count
        a1 = listAt(varPts, count) # Sets points per frame
        a2 = listAt(noVarPts, count) # Sets points (no VAR) per frame

        plt.cla() # Clears graph, sets space for new graph
        ax.barh(ind, a1, width, color='blue', label='VAR (Penalty Calls)') # First bar
        ax.barh(ind + width, a2, width, color='green', label='No VAR') # Second bar
        ax.set(yticks=ind + width, yticklabels=df.graph, ylim=[2*width - 1, len(df)], xlim=[0,110])
        plt.gca().invert_yaxis()
        plt.xlabel(f'Pts (Season: {season})')
        ax.legend()

        # Show which teams had advantages/disadvantages as a result of VAR
        shifts = data[f'Shift({season})'].tolist()
        for i in range(len(shifts)):
          if shifts[i] < 0:
            plt.gca().get_yticklabels()[i].set_color("red")
          elif shifts[i] > 0:
            plt.gca().get_yticklabels()[i].set_color("green")

        plt.legend()
        plt.title("Premier League Table (VAR vs. No VAR)")

        count += 1
        
    ani = FuncAnimation(fig, animate, frames = 49, interval = 100, repeat= False)
    plt.show()

except Exception as ex:
    print(f'Error: [{str(ex)}]')

In [None]:
from pandas import *
import re
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from matplotlib.animation import FuncAnimation


def printList(aList):
  for elements in aList:
    print(f'* {elements}')

def lowest10(n):
  if n % 10 == 0:
    n -= 1
  if n % 2 != 0:
    n -= 1
  while n % 10 != 0:
    n -= 2
  return n

def highest10(n):
    if n % 10 == 0:
        n += 1
    if n % 2 != 0:
        n += 1
    while n % 10 != 0:
        n += 2
    return n

def returnElements(elements, idxs):
    subElements = []
    for i in range(idxs + 1):
        subElements.append(elements[i])
    return subElements

def returnElementsRange(elements, start, end):
    subElements = []
    for start in range(end + 1):
        subElements.append(elements[i])
    return subElements

try:
    teamList = ['Man City', 'Man Utd', 'Tottenham', 'Liverpool', 'Chelsea', 'Arsenal', 'Burnley', 'Everton', 'Leicester City', 'Newcastle Utd',
                  'Crystal Palace', 'West Ham', 'Brighton', 'Southampton']

    data = read_csv('https://raw.githubusercontent.com/SebastianAshcallay/CMSC206/main/GroupProject_PL_Table[version2].csv')
    print('Which Premier League team would you like to see points for?')
    printList(teamList)
    team = input('\n--> ')

    if team not in teamList:
      raise Exception('Invalid team! Please try entering a team from the list')

    pts = []
    pts_title = []
    ptsNoVar = []
    pts_titleNoVar = []
    pts_MIN = 0
    pts_MAX = 0

    # Experiment: Seeing how many seasons are recorded in CSV file. Automatically updates with CSV file
    column_names = list(data.columns)
    season_cnt = 0
    for cols in column_names:
      if re.match(r"20\d\d/\d\d", cols):
        season_cnt += 1
    # print(season_cnt)

    
    for i in range(season_cnt):
      ptsList = data[f'Pts (20{i + 17}/{i + 18})'].tolist() # Get points for every season
      teamsList = data[f'20{i + 17}/{i + 18}'].tolist() # Get teams for every season
      pts.append(ptsList[teamsList.index(team)])
      pts_title.append(f'20{i + 17}/{i + 18}')

      # Find min and max pts of team between these past seasons
      if i == 0:
        pts_MIN = ptsList[teamsList.index(team)]
        pts_MAX = ptsList[teamsList.index(team)]
      if i != 0:
        if pts_MIN > ptsList[teamsList.index(team)]:
          pts_MIN = ptsList[teamsList.index(team)]
        if pts_MAX < ptsList[teamsList.index(team)]:
          pts_MAX = ptsList[teamsList.index(team)]
      if i >= 2:
        ptsNoVarList = data[f'Pts (No VAR)(20{i + 17}/{i + 18})'].tolist()
        ptsNoVar.append(ptsNoVarList[teamsList.index(team)])
        pts_titleNoVar.append (f'20{i + 17}/{i + 18}')

    pts_LIMITS = (lowest10(pts_MIN), highest10(pts_MAX)) 


    count = 0
    def animate(i):
      global count
      x = returnElements(pts_title, count) # Sets extent of seasons (x-axis)
      y = returnElements(pts, count) # Sets trajectory of line plot (y-axis)
      
      plt.cla() # clear axis after plotting individual lines
      plt.plot(x, y, label = 'Actual Pts (VAR since 2019/20)') # selecting the x and y variables to plot

      # VAR introduction in 2019/20 season: Introduce new line
      if count >= 3:
        x_1 = returnElements(pts_titleNoVar, count - 2)
        x_1.insert(0, pts_title[1])
        y_1 = returnElements(ptsNoVar, count - 2)
        y_1.insert(0, pts[1])
        plt.plot(x_1, y_1, label = 'Pts (Excluding VAR since 2019/20)')
        
      plt.xlabel('Seasons') # label x axis
      plt.ylim((pts_LIMITS[0], pts_LIMITS[1])) # Set limits according to point interval between seasons
      plt.ylabel('Pts') # label y axis
      plt.legend()
      plt.title(f'Premier League Pts (2017-22): {team}')

      count += 1

    ani = FuncAnimation(plt.figure(), animate, interval = 500, frames = (season_cnt - 1), repeat = False)
    plt.show()
    
except Exception as ex:
    print(f'Error: [{str(ex)}]')

In [None]:
import requests
import matplotlib.pyplot as plt
 
def printGraph(teamSet, scoreSet, xlabel, ylabel, title):
  # Clear previous graph
  plt.cla()
  # Create the bar chart for season
  plt.bar(teamSet, scoreSet)

  # Add labels and title
  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.title(title)

  # Rotate x-axis labels for readability
  plt.xticks(rotation=90)

  # Display the chart
  plt.show()

def sign(stringNum):
  if '-' in stringNum:
    return int(stringNum)
  elif '0' in stringNum:
    return int(0)
  else:
    return int(stringNum[1:])

def getSeason(html):
  title_start = html.find('<title>')
  season_start = html.find('20', title_start + 1)
  season_end = html.find('-', season_start) + 3
  season = html[season_start: season_end]
  return season

def getTeams(html):
  teams = []
  start = html.find('VAR overturns')
  br = html.find('<br>', start)
  h2 = html.find('<h2>', br)
  for i in range(20):
    href_start = h2
    name_start = html.find('class="">', href_start)
    name_end = html.find('</a>', name_start + 9)
    name = html[name_start + 9:name_end]
    teams.append(name)
    h2 = html.find('<h2>', name_end)
  
  return teams

def getNetScores(html):
  net = []
  start = html.find('VAR overturns')
  br = html.find('<br>', start)
  h2 = html.find('<h2>', br)
  for i in range(20):
    href_start = h2
    name_start = html.find('class="">', href_start)
    name_end = html.find('</a>', name_start + 9)
    netPts_start = html.find(' ', name_end)
    h2_end = html.find('</h2>', name_end)
    netPts = html[netPts_start + 1:h2_end]
    net.append(sign(netPts))

    h2 = html.find('<h2>', name_end)
  
  return net

try:
  print()
  link1 = "https://www.espn.com/soccer/english-premier-league/story/4452736/how-var-decisions-have-affected-every-premier-league-club-in-2021-22"
  link2 = 'https://www.espn.com/soccer/english-premier-league/story/4182135/how-var-decisions-affected-every-premier-league-club-in-2020-21'
  link3 = 'https://www.espn.com/soccer/english-premier-league/story/3929823/how-var-decisions-have-affected-every-premier-league-club'
  answer = input('Which season do you want to graph?\n* 2019/20\n* 2020/21\n* 2021/22\n--> ')
  if answer == '2021/22':
    pageN = requests.get(link1)
  elif answer == '2020/21':
    pageN = requests.get(link2)
  elif answer == '2019/20':
    pageN = requests.get(link3)
  else:
    print('ERROR: [Season Out of Bounds]')

  htmlN = pageN.text
  print('HTML handled...')
  seasonN = getSeason(htmlN)
  print('Season handled...')
  teamsN = getTeams(htmlN)
  print('Teams handled...')
  netN = getNetScores(htmlN)
  print('Net scores handled...')
  printGraph(teamsN, netN, "Team", "Net Score", f"Premier League VAR overturns - Net Scores: Season {seasonN}")
  print()

except Exception as ex:
  print(f'Error: [{str(ex)}]')

In [None]:
# -*- coding: utf-8 -*-
"""ParserCode_PremierLeague.ipynb 

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1ciYiOM4VEGN12KHfrNE66t7N4oysfDLR
"""

import requests
import pandas as pd

team_legend = {
    'Bournemouth': 'AFC Bournemouth',
    'Nottm Forest': 'Nottingham Forest' ,
    'Newcastle': 'Newcastle United',
    'Wolves': 'Wolverhampton Wanderers',
    'West Ham': 'West Ham United',
    'Leeds': 'Leeds United',
    'Tottenham': 'Tottenham Hotspur'
}

# Retrieve information for 2022/23 season via URL
def sign(stringNum):
  if '-' in stringNum:
    return int(stringNum)
  elif '0' in stringNum:
    return int(0)
  else:
    return int(stringNum[1:])

def getSeason(html):
  title_start = html.find('<title>')
  season_start = html.find('20', title_start + 1)
  season_end = html.find('-', season_start) + 3
  season = html[season_start: season_end]
  return season

def getTeams(html):
  teams = []
  start = html.find('net score')
  br = html.find('<br>', start)
  h2 = html.find('<h2>', br)
  for i in range(20):
    href_start = h2
    name_start = html.find('class="">', href_start)
    name_end = html.find('</a>', name_start + 9)
    name = html[name_start + 9:name_end]
    teams.append(name)
    h2 = html.find('<h2>', name_end)
  
  return teams

def getNetScores(html):
  net = []
  start = html.find('net score')
  br = html.find('<br>', start)
  h2 = html.find('<h2>', br)
  for i in range(20):
    href_start = h2
    name_start = html.find('class="">', href_start)
    name_end = html.find('</a>', name_start + 9)
    netPts_start = html.find(' ', name_end)
    h2_end = html.find('</h2>', name_end)
    netPts = html[netPts_start + 1:h2_end]
    net.append(sign(netPts))

    h2 = html.find('<h2>', name_end)
  
  return net

def getDecisionsAgainst(html):
  against = []
  teams = []
  start = html.find('- decisions')
  br = html.find('<br>', start)
  name_start = html.find('class="">', br)
  for i in range(20):
    name_end = html.find('</a>', name_start + 9)
    name = html[name_start + 9: name_end]
    teams.append(name)
    ag_start = html.find(' ', name_end)
    h2_end = html.find('</h2>', name_end)
    ag = html[ag_start + 1:h2_end]
    against.append(int(ag))

    name_start = html.find('class="">', h2_end)
  
  teamAgainst = {teams[i]: against[i] for i in range(len(teams))}

  return teamAgainst

def getDecisionsFor(html):
  favor = []
  teams = []
  i = html.find('- decisions')
  start = html.find('- decisions', i + 30)
  br = html.find('<br>', start + 10)
  name_start = html.find('class="">', br)
  for i in range(20):
    name_end = html.find('</a>', name_start + 9)
    name = html[name_start + 9: name_end]
    teams.append(name)
    fv_start = html.find(' ', name_end)
    h2_end = html.find('</h2>', name_end)
    fv = html[fv_start + 1:h2_end]
    favor.append(int(fv))

    name_start = html.find('class="">', h2_end)
  
  teamAgainst = {teams[i]: favor[i] for i in range(len(teams))}

  return teamAgainst

def getTeams_Placed(html):
  teamsP = []
  name_start = html.find('<abbr style="text-decoration:none" title="')
  for i in range(20):
    name_end = html.find('"', name_start + 43)
    name = html[name_start + 42: name_end]
    if '&amp;' in name: # ampersand
      name  = name[:name.find('&')] + '&' + name[name.find(';') + 1:]
    if name in team_legend.values():
      name = [k for k, v in team_legend.items() if v == name][0]
    teamsP.append(name)
    name_start = html.find('<abbr style="text-decoration:none" title="', name_end)

  return teamsP 

def getStats(html, stat):
  games = []
  wins = []
  draws = []
  losses = []
  goalsF = []
  goalsA = []
  goalDiff = []
  pts = []

  lenTD = '<td class="Table__TD"><span class="stat-cell">'
  data_start = html.find(lenTD) + len(lenTD)
  for i in range(20):
    # append games
    game_end = html.find('<', data_start + 1)
    game = (html[data_start: game_end]).strip()
    games.append(game)

    # append wins
    win_start = html.find(lenTD, game_end) + len(lenTD)
    win_end = html.find('<', win_start + 1)
    win = int((html[win_start: win_end]).strip())
    wins.append(win)

    # append draws
    draw_start = html.find(lenTD, win_end) + len(lenTD)
    draw_end = html.find('<', draw_start + 1)
    draw = int((html[draw_start: draw_end]).strip())
    draws.append(draw)

    # append losses
    loss_start = html.find(lenTD, draw_end) + len(lenTD)
    loss_end = html.find('<', loss_start + 1)
    loss = int((html[loss_start: loss_end]).strip())
    losses.append(loss)

    # append goals in favor
    gf_start = html.find(lenTD, loss_end) + len(lenTD)
    gf_end = html.find('<', gf_start + 1)
    gf = int((html[gf_start: gf_end]).strip())
    goalsF.append(gf)

    # append goals against
    ga_start = html.find(lenTD, gf_end) + len(lenTD)
    ga_end = html.find('<', ga_start + 1)
    ga = int((html[ga_start: ga_end]).strip())
    goalsA.append(ga)

    # append goal difference
    lenTD_pos = '<span class="stat-cell clr-'
    gdiff_start = html.find(lenTD_pos, ga_end) + len(lenTD_pos) + 10
    gdiff_end = html.find('<', gdiff_start + 1)
    gdiff = sign((html[gdiff_start: gdiff_end]).strip())
    goalDiff.append(gdiff)

    # append pts
    pt_start = html.find(lenTD, gdiff_end) + len(lenTD)
    pt_end = html.find('<', pt_start + 1)
    pt = int((html[pt_start: pt_end]).strip())
    pts.append(pt)

    # restarts
    data_start = html.find(lenTD, pt_end) + len(lenTD)
  
  if stat.lower() == 'games':
    return games
  elif stat.lower() == 'wins':
    return wins
  elif stat.lower() == 'losses':
    return losses
  elif stat.lower() == 'draws':
    return draws
  elif stat.lower() == 'goals_f':
    return goalsF
  elif stat.lower() == 'goals_a':
    return goalsA
  elif stat.lower() == 'goal_diff':
    return goalDiff
  elif stat.lower() == 'points':
    return pts
  else:
    print('Error: [Option chosen is not compatible with list of options]')
try:
  print()
  linkN = 'https://www.espn.com/soccer/english-premier-league/story/4722849/how-var-decisions-have-affected-every-premier-league-club-in-2022-23'
  pageN = requests.get(linkN)

  htmlN = pageN.text
  seasonN = getSeason(htmlN)
  teamsN = getTeams(htmlN)
  netN = getNetScores(htmlN)
  teams_against = getDecisionsAgainst(htmlN)
  teams_for = getDecisionsFor(htmlN)

  teamNet = {teamsN[i]: netN[i] for i in range(len(teamsN))}

  linkStd = 'https://www.espn.com/soccer/table/_/league/eng.1'
  pageStd = requests.get(linkStd)
  htmlStd = pageStd.text
  teamsStd = getTeams_Placed(htmlStd)
  pts_VAR = getStats(htmlStd, 'points')
  goalDifference = getStats(htmlStd, 'goal_diff')
  winStd = getStats(htmlStd, 'wins')
  drawStd = getStats(htmlStd, 'draws')
  lossStd = getStats(htmlStd, 'losses')


  # Sort VAR calls against
  teams_against_sorted = []
  for i in range(len(teamsStd)):
    teams_against_sorted.append(teams_against[teamsStd[i]])

  # Sort VAR calls for
  teams_for_sorted = []
  for i in range(len(teamsStd)):
    teams_for_sorted.append(teams_for[teamsStd[i]])

  # Sort Net Scores
  netN_sorted = []
  for i in range(len(teamsStd)):
    netN_sorted.append(teamNet[teamsStd[i]])

  #  Create csv  
  position = []
  for i in range(20):
    position.append(i + 1)

  cols = {'Position': position, f'Teams': teamsStd, 'Pts': pts_VAR, 
          'GD': goalDifference, 'Wins': winStd, 'Draws': drawStd, 'Losses': lossStd,
          'Net (VAR)': netN_sorted, 'Against (VAR)': teams_against_sorted, 
          'For (VAR)': teams_for_sorted}  
        
  df = pd.DataFrame(cols) 
  df.to_csv(f'SampleTable{seasonN}.csv') 

except Exception as ex:
  print(f'Error: [{str(ex)}]')