## HTML passing with Beautiful Soup: A Mini Project
#### scrape data from a public website
#### organize the data as a dataframe
#### export the data as an excel sheet

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
url = "https://www.bbc.com/sport/football/premier-league/top-scorers"

In [4]:
player_names = []
player_teams = []
goals = []
assists = []
num_matches = []
shots= []
try:
    response = requests.get(url)
    response.raise_for_status()
except Exception as e:
    print(e)
else:
    soup = BeautifulSoup(response.content, 'html.parser')
    players = soup.find('tbody').find_all('tr', class_='ssrcss-dhlz6k-TableRowBody e1icz100')
    
    for player in players:
        player_name = player.find('div', class_ = "ssrcss-m6ah29-PlayerName e1n8xy5b1").get_text(strip = True)
        player_team = player.find('div', class_ = "ssrcss-qvpga1-TeamsSummary e1n8xy5b0").get_text(strip = True)
        goals_scored = int(player.find('div','ssrcss-8k20kk-CellWrapper ef9ipf0').get_text(strip= True))
        
        stats =  player.find_all('div',class_ = 'ssrcss-150z8d-CellWrapper ef9ipf0')
        assists_made = int(stats[0].get_text(strip=True))
        matches_played = int(stats[2].get_text(strip=True))
        shots_taken = int(stats[-3].get_text(strip=True))
        
        player_names.append(player_name)
        player_teams.append(player_team)
        goals.append(goals_scored)
        assists.append(assists_made)
        num_matches.append(matches_played)
        shots.append(shots_taken)

    data = {
        'player': player_names,
        'team' : player_teams,
        'matches' : num_matches,
        'goals' : goals,
        'assists': assists,
        'shots' : shots
    }
    df_players = pd.DataFrame(data)
        


In [5]:
df_players

Unnamed: 0,player,team,matches,goals,assists,shots
0,Mohamed Salah,Liverpool,30,27,17,107
1,E. Haaland,Man City,28,21,3,102
2,A. Isak,Newcastle,26,20,5,72
3,C. Wood,Nottm Forest,29,18,3,53
4,B. Mbeumo,Brentford,30,16,5,65
5,C. Palmer,Chelsea,29,14,7,104
6,Y. Wissa,Brentford,27,14,2,66
7,O. Watkins,Aston Villa,30,13,6,72
8,Matheus Cunha,Wolves,26,13,4,86
9,J. Kluivert,Bournemouth,28,12,6,56


In [6]:
df_players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   player   41 non-null     object
 1   team     41 non-null     object
 2   matches  41 non-null     int64 
 3   goals    41 non-null     int64 
 4   assists  41 non-null     int64 
 5   shots    41 non-null     int64 
dtypes: int64(4), object(2)
memory usage: 2.1+ KB


In [9]:
df_players.columns

Index(['player', 'team', 'matches', 'goals', 'assists', 'shots'], dtype='object')

In [30]:
df_players.isnull().sum()

player     0
team       0
matches    0
goals      0
assists    0
shots      0
dtype: int64

In [7]:
!pip install openpyxl
!pip install xlsxwriter



In [8]:
df_players.to_excel('EPL Top Scorers.xlsx', index= False, engine='xlsxwriter')