<a href="https://colab.research.google.com/github/ashleykitzman/HelloWorldRepo/blob/main/Assignment2_Team1_CIS418.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
########################################################################################################
'''
Task 1: Import all the libraries (requests, beautiful soup, pandas, numpy, and matplotlib)
'''

# imports libraries
from bs4 import BeautifulSoup
import requests
import matplotlib.pyplot as plt
import numpy as np
import math
import pandas as pd
import seaborn as sns

########################################################################################################

### You are not allowed to change the signature of any of the methods for the following class.
### You also cannot add any extra attributes to the class.

class WorldCupDataAnalysis():
    def __init__(self, url='https://fbref.com/en/comps/1/possession/World-Cup-Stats#stats_possession::none'):
        self.data = None            # Stores the dataframe with the data scrapped from the website
        self.url = url              # Stores the url for the website
        self.ranked_col_names = []  # Stores the column names for which a rank is calculated (Check question 2)
        '''
        The following dictionary holds the color code for the plots for each team
        '''
        self.team_colors = {
        'ar':'#97233f',
        'au':'#a71930',
        'be':'#241773',
        'br':'#00338d',
        'cm':'#0085ca',
        'ca':'#0b162a',
        'cr':'#fb4f14',
        'hr':'#311d00',
        'dk':'#041e42',
        'ec':'#002244',
        'en':'#0076b6',
        'fr':'#203731',
        'de':'#03202f',
        'gh':'#002c5f',
        'ir':'#006778',
        'jp':'#e31837',
        'kr':'#002a5e',
        'mx':'#003594',
        'ma':'#008e97',
        'nl':'#4f2683',
        'pl':'#002244',
        'pt':'#d3bc8d',
        'qa':'#0b2265',
        'sa':'#125740',
        'sn':'#000000',
        'rs':'#004c54',
        'es':'#ffb612',
        'ch':'#aa0000',
        'tn':'#002244',
        'us':'#d50a0a',
        'uy':'#0c2340'
      }

    '''
    Task 2: Populate this map to with the continents for the teams. Check https://en.wikipedia.org/wiki/2022_FIFA_World_Cup_qualification
    '''
    # While looking at Wikipedia, Teams are divided into categories which include: AFC (Asia): 4 or 5, CAF (Africa): 5, CONCACAF (North, Central America and Caribbean): 3 or 4 ,
    # CONMEBOL (South America): 4 or 5, OFC (Oceania): 0 or 1, UEFA (Europe): 13, Hosts: 1
    # Code below populates the map with each team and their corresponding continent

    self.continent_map = {"ar": "South America",
                            "au": "Asia",
                            "be": "Europe",
                            "br": "South America",
                            "cm": "Africa",
                            "ca": "North America",
                            "cr": "Central America",
                            "hr": "Europe",
                            "dk": "Europe",
                            "ec": "South America",
                            "en": "Europe",
                            "fr": "Europe",
                            "de": "Europe",
                            "gh": "Africa",
                            "ir": "Asia",
                            "jp": "Asia",
                            "kr": "Asia",
                            "mx": "North America",
                            "ma": "Africa",
                            "nl": "Europe",
                            "pl": "Europe",
                            "pt": "Europe",
                            "qa": "Asia",
                            "sa": "Asia",
                            "sn": "Africa",
                            "rs": "Europe",
                            "es": "Europe",
                            "ch": "Europe",
                            "tn": "Africa",
                            "us": "North America",
                            "uy": "South America"
                            }

    '''
    Task 3: Extracts the table of countries and their respective possession stats from the provided url.

    Input: url (string): If an url is not provided then the url from object initialization is used to scrape the data.
    Output: This method populates the data (check __init__) attribute of the object. Nothing is returned from this method.
    '''

def scrape(self, url='https://fbref.com/en/comps/1/possession/World-Cup-Stats#stats_possession::none'):

  possession_page = requests.get(url) # uses requests to get the url
  soup = BeautifulSoup(possession_page.text, 'lxml') # creates a soup object using lxml
  headers = soup.find_all('table', id = 'stats_squads_possession_for')[0].find_all('tr')[1]
  columns_p = headers.find_all('th') #this pulls all the table headers from the scraped website
  columns_t =[] #creates an empty array for the columns of the data frame

  for x in columns_p: # for every table header it will add it to the columns
      columns_t.append(x.text) #adds every header as text to the dataframe columns

  # print(columns_t)

  data = soup.find_all('table', id = 'stats_squads_possession_for')[0].find_all('tbody') # the information we need is inbetween the tbody
  data_rows = data[0].find_all('tr') # pulls the table rows from the body of the text

  dataframe_data = [] #creates an array for the data frame

  for i in data_rows:
      values = [] # creates an empty array for the values within the data frame
      values.append(i.find_all('th')[0].text)
      team_data = i.find_all('td') # extracts the table data
      for j in team_data:
          values.append(float(j.text)) # changes the data to a float

      dataframe_data.append(values) # adds the values to the dataframe

  self.data = pd.DataFrame(dataframe_data, columns = columns_t) # puts the data frame into into the self.data variable

 '''
    Task 4: Create new columns that indicate the percentile (rank) of the data in each column except for the column name squad.For example, for the 'Succ' column, a new column is added called 'Succ_rank'.
    This method adds new columns to the data attribute of the object.
    This method also adds the columns (<col_name>) for which <col_name>_rank column is created to the list ranked_col_names.

    Input: None.
    Output:  Nothing is returned from this method.
    '''


    def rank(self):
      for column in self.data.columns: #Loops through all columns created in the scrape method
            if column != 'Squad': #Checks to see if the column name is squad
             rankcol = f'{column}_rank' #Places the newly created rank column in front of all previously created columns
             self.data[rankcol] = self.data[column].rank(pct=True) # booleen is set to true to add the percentile rank for the data
             self.ranked_col_names.append(column) #adds the column to the self.ranked_col_names


 '''
    Task 5: Splits the 'Squad' column value into two new columns 'Squad' and 'Code'. For example, a value 'ar Argentina' will be have a code 'ar' and squad 'Argentina'.
    After adding these columns the old squad column is dropped from the dataframe.
    The method also converts the data type for all the columns (that should be numeric) to a numeric.
    All these changes must be reflected on the data attribute of the object.

    Input: None.
    Output:  Nothing is returned from this method.
    '''

    def preprocess(self):
      self.data[['Squad','Code']] = self.data['Squad'].str.split(' ',Expand = True)  #Splits the Squad column into two new columns, Squad and code usng str.split() method
      self.data['Squad'] = self.data['Squad'].astype(int) #Converts squad column data to numeric value
      self.data['Code'] = self.data['Code'].astype(int) # Converts code column data
      self.data.drop(columns=['Squad'], inplace=True) # inplace set to true to acutally modify the data frame, drop the old column is dropped

    '''
    Task 6: Returns the best teams from each continent. The overall ranking for each team is calculated based on an aggregate function (e.g. sum or mean) of the Rank columns. Adds this new Rank column as overall_Rank to the DataFrame.

    Input: None.
    Output:  Returns a DataFrame with the best teams for each continent.
    '''

    def continentBest(self):
      self.data['overall_Rank'] = self.data[self.ranked_col_names].mean(axis=1)

      best_continent_teams = pd.DataFrame(columns=['Continent', 'Squad', 'overall_Rank'])

      for continent, teams in self.continent_map.items():
          continent_data = self.data[self.data['Code'].isin(teams)]
          best_team = continent_data.loc[continent_data['overall_Rank'].idxmin()]
          best_continent_teams = best_continent_teams.append({'Continent': continent, 'Squad': best_team['Squad'], 'overall_Rank': best_team['overall_Rank']}, ignore_index=True)

      return best_continent_teams


    '''
    Task 7: Returns the name of the team that has the most number of touches in the attacking 3rd and attacking penalty area.

    Input: None.
    Output:  Returns a string denoting the best team for attacking.
    '''

    def bestAttack(self):
      best_Attack = self.data.loc[self.data['Att 3rd'].idmax(), 'Squad'] # takes the index max of the attacking 3rd
      string_best_Attack = 'The best team for attacking is' + best_Attack # creates a string representation of the best team attacking
      return string_best_Attack # returns string

    '''
    Task 8: Returns the name of the team that has the most number of touches in the defensive 3rd and defensive penalty area.

    Input: None.
    Output:  Returns a string denoting the best team for defense.
    '''

    def bestDefense(self):
      best_Defense = self.data.loc[self.data['Def 3rd'].idmax(), 'Squad'] # takes the index max for the defensive 3rd
      string_best_Defense = 'The best team for defending is' + best_Defense # string that denotes best defense
      return string_best_Defense #returns string

    '''
    Task 9: Returns the name of the team that has the most number of touches in the midfield 3rd area.

    Input: None.
    Output:  Returns a string denoting the best team for midfield.
    '''

    def bestMidfield(self):
      best_midfield = self.data.loc[self.data['Mid 3rd'].idmax(), 'Squad'] # takes the max for the middle 3rd
      string_best_midfield = 'The best team for midfield is' + best_midfield # string for midfield best
      return string_best_midfield # returns string

    '''
    Task 10: Returns the rank data for the team passed as parameter. The rank data includes the ranks apart from the overall_Rank. This method also returns the color code for the team.

    Input: String specifying the name of the team (i.e. 'Squad').
    Output:  Returns a numpy array with the ranking data and string color code.
    '''

    def getRanksData(self, team):
      rankData = self.data.loc[self.data['Poss'],'Squad',self.team_colors] # locates squad and the specified team colors
      str_rankData = 'Team passed data: ' + rankData # string representation
      return str_rankData # returns ranking data and string color code

    '''
    Task 11: Draws a radar chart (on the axes passed as parameter) with the data, color code, and variable names passed as parameter.

    Input: A matplotlib axes object.
    An numpy array for the Data to plot.
    A list of var_names for which the function plots the data.
    A string specifying the color to use for the plot.
    Output:  Nothing is returned from this method.
    '''

    def __drawRadarChart(self, ax, data, var_names= None, color=None):

      # Number of variables based on length of var_names
      num_vars = len(var_names)

      # Calculates the angle for each axis in the radar chart
      angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
      angles += angles[:1]  # Closes the plot with the first angle at the end

      # Ploting the data
      ax.fill(angles, data, color=color, alpha=0.25)
      ax.set_yticklabels([])  # Hiding the y-axis labels
      ax.set_xticks(angles[:-1]) # sets the x-axis ticks based on the calculated angles
      ax.set_xticklabels(var_names) # assigns the lables to the ticks

      # Adding the title
      ax.set_title('Radar Chart', fontsize=12, color='gray')

      # Filling the area inside of the radar chart
      ax.fill(angles, data, color=color, alpha=0.25)

      # Add gridlines
      ax.grid(color='gray', linestyle='-', linewidth=0.5)

      # Set axis limits
      ax.set_ylim(0, 1)


    '''
    Task 12: Draws a radar chart (on the axes passed as parameter) for the team passed as parameter. If the var_names is None then uses the columns in the 'ranked_col_names' list for the radar chart.

    Input: A matplotlib axes object.
    A string specifying the team name (i.e. Squad).
    An optional list of variable names to be used in the radar chart and a string color code is also passed as input to this method.
    Output:  Nothing is returned from this method.
    '''

    def visualizeTeam(self, ax, team_name, var_names = None, color=None):

