In [1]:
#imports
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pymongo import MongoClient
import pandas as pd
import numpy as np
import datetime
driver = webdriver.Chrome()

#player class
class Player(object):
            def __init__(self, id, name, team, goals, saves, offsides,
                         minutesPlayed, goalsConceded, played, yellowCards,redCards):
                self.id = id
                self.name = name
                self.team = team
                self.goals = goals
                self.saves = saves
                self.offsides = offsides
                self.minutesPlayed = minutesPlayed
                self.goalsConceded = goalsConceded
                self.played = played
                self.yellowCards = yellowCards
                self.redCards=redCards

#go to the main page
clubs = "https://www.uefa.com/uefachampionsleague/clubs/"
driver.get(clubs)

# create dictionary for teams data
teamsData = []

# player counter
count = 0

#save number of links to teams
nTeams = len(driver.find_elements_by_css_selector("a.team-wrap"))

#first loop: teams
for currentTeam in range(nTeams):  
    #save links to teams
    cssSelector = "a.team-wrap"
    try:
        WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, cssSelector)))
    except:
        #time.sleep(5)
        print("no teams")
    teams = driver.find_elements_by_css_selector(cssSelector)
    
    #save href attribute and go to squad page
    teamLink = str(teams[currentTeam].get_attribute('href')) + "/squad"
    driver.get(teamLink)
    time.sleep(2)

    
    #save team name
    cssSelector = "h1.team-name"
    team = driver.find_element_by_css_selector(cssSelector).text
    if(team == ""):
        try:
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, cssSelector)))
            team = driver.find_element_by_css_selector(cssSelector).text
        except:
            team = driver.find_element_by_css_selector(cssSelector).text
            
    #save number of players
    nPlayers = len(driver.find_elements_by_css_selector("a.player-name"))
    
    #second loop: players
    for currentPlayer in range(nPlayers):
        cssSelector = "a.player-name"
        player = driver.find_elements_by_css_selector(cssSelector)
        time.sleep(1)
        driver.execute_script("arguments[0].click();", player[currentPlayer])
        time.sleep(1)

        total = {}
        statisticsListBlock = driver.find_elements_by_css_selector("div.player--statistics--list")
        
        # save player's id and name
        url = driver.current_url
        x = url.split('/')
        data = x[6].split('--')
        player_id = data[0]
        cssSelector = "h1.player-header_name"
        name = driver.find_element_by_css_selector(cssSelector).text
        if(name == ""):
            try:
                WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, cssSelector)))
                name = driver.find_element_by_css_selector(cssSelector).text
            except:
                name = driver.find_element_by_css_selector(cssSelector).text
        
        # if there are statistics, save it
        if(len(statisticsListBlock) == 1):
            cssSelector = "div.player--statistics--list .field"
            fields = driver.find_elements_by_css_selector(cssSelector)
            if(len(fields) == 0):
                try:
                    WebDriverWait(driver, 10).until(EC.visibility_of((By.CSS_SELECTOR, cssSelector)))
                    fields = driver.find_element_by_css_selector(cssSelector)
                except:
                    fields = driver.find_element_by_css_selector(cssSelector)
            
            # get data from fields
            goalsCount = 0
            for field in fields:
                label = field.find_element_by_class_name("statistics--list--label").text
                data = field.find_element_by_class_name("statistics--list--data").text
                    
                # to save number of red and yellow cards, split the text in the space between the numbers
                if(label == "CARDS"):
                    cards = data.split(' ')
                    total.update({"YELLOW CARDS" : int(cards[0])})        
                    total.update({"RED CARDS" : int(cards[1])})                                
                        
                # if any of the labels is one of these, save it as integer
                if(label == "SAVES") or (label == "OFFSIDES") or (label == "MINUTES PLAYED") or (label == "PLAYED"):
                    total.update({label: int(data)})
                # if the label is goals, count it, and if it's the second, it's actually goals conceded
                elif(label == "GOALS"):
                    data = int(data)
                    goalsCount = goalsCount + 1
                    if goalsCount == 1:
                        total.update({"GOALS": data})
                    if goalsCount == 2:
                        total.update({"GOALS CONCEDED": data})                    
                # if is not one of the above, save it as text
                else:
                    total.update({label: data})

            # save data in a player object
            try:
                player = Player(player_id, name, team, total["GOALS"], total["SAVES"],
                               total["OFFSIDES"], total["MINUTES PLAYED"],
                                total["GOALS CONCEDED"], total["PLAYED"],
                                total["YELLOW CARDS"], total["RED CARDS"])
            except KeyError:
                player = Player(player_id, name, team, total.get("GOALS"), total.get("SAVES"),
                                total.get("OFFSIDES"), total.get("MINUTES PLAYED"),
                                total.get("GOALS CONCEDED"), total.get("PLAYED"),
                                total.get("YELLOW CARDS"), total.get("RED CARDS"))           

        # if there are no statistics... just save 0
        else:
            player = Player(player_id, name, team, 0, 0, 0, 0, 0, 0, 0, 0)        
        
        # add data to a dict
        playerData = {
            "player id" : player.id,
            "name" : player.name,
            "team" : player.team,
            "goals" : player.goals,
            "saves" : player.saves,
            "offsides" : player.offsides,
            "minutes played" : player.minutesPlayed,
            "goals conceded" : player.goalsConceded,
            "played" : player.played,
            "yellow cards" : player.yellowCards,
            "red cards" : player.redCards
        }
        
        # if names are empty, save unknown, if numbers are empty, save 0
        if playerData.get("player id") == "":
            playerData["player id"] = "unknown"
        if playerData.get("name") == "":
            playerData["name"] = "unknown"
        if playerData.get("team") == "":
            playerData["team"] = "unknown"
        if playerData.get("goals") == None:
            playerData["goals"] = 0
        if playerData.get("saves") == None:
            playerData["saves"] = 0
        if playerData.get("offsides") == None:
            playerData["offsides"] = 0
        if playerData.get("minutes played") == None:
            playerData["minutes played"] = 0
        if playerData.get("goals conceded") ==None:
            playerData["goals conceded"] = 0
        if playerData.get("played") == None:
            playerData["played"] = 0
        if playerData.get("yellow cards") == None:
            playerData["yellow cards"] = 0
        if playerData.get("red cards") == None:
            playerData["red cards"] = 0

        # print every player stats
        print(str(count) + " - " + playerData.get("player id") + " - " +
        playerData.get("name") + " - " +
        playerData.get("team") + " - " +
        str(playerData.get("goals")) + " - " +
        str(playerData.get("saves")) + " - " +
        str(playerData.get("offsides")) + " - " +
        str(playerData.get("minutes played")) + " - " +
        str(playerData.get("goals conceded")) + " - " +
        str(playerData.get("played")) + " - " +
        str(playerData.get("yellow cards")) + " - " +
        str(playerData.get("red cards")))
        count = count + 1
        
        # add player data to team dict
        teamsData.append(playerData)
        
        #finish player and go back to team page
        driver.get(teamLink)
    
    #finish team and go back to main page
    driver.get(clubs)

# save in a dataframe
teams_df = pd.DataFrame(teamsData, columns=["player id", "name", "team", "goals", "saves",
                                            "offsides",  "minutes played", "goals conceded",
                                            "played", "yellow cards", "red cards"])

teams_df.reset_index(inplace=True)
data_dict = teams_df.to_dict("records")

# mongo configuration
myclient = MongoClient("mongodb://localhost:27017/")
mydb = myclient["uefa"]
mycol = mydb["pruebas-finales-2"]

# Insert to collection
mycol.insert_many(data_dict)
driver.close()

0 - 250019851 - Kenny Stamatopoulos - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
1 - 103910 - Budimir Janošević - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
2 - 250083420 - Oscar Linnér - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
3 - 250102585 - Samuel Brolin - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
4 - 250102587 - Daniel Lundquist - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
5 - 250083192 - Daniel Granli - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
6 - 68423 - Per Karlsson - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
7 - 250110984 - Rasmus Lindkvist - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
8 - 250013527 - Karol Mets - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
9 - 250102589 - Adam Ben Lamin - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
10 - 250121241 - Heradi Rashidi - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
11 - 250014306 - Magnar Ødegård - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
12 - 250102579 - Claus Royo - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
13 - 250125907 - Robin Sundgren - AIK - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0
14 - 250121240 - Panajotis Dimitriadis 