In [4]:
import pandas as pd

In [8]:
matches_raw_dataset =  pd.read_csv("../datasets/matches.csv")
deliveries_raw_dataset =  pd.read_csv("../datasets/deliveries.csv")

In [9]:
deliveries_raw_dataset.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,extra_runs,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,fielder
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,1,1,legbyes,0,,,
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,1,1,wides,0,,,
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,


# Creating Players Dataset

In [20]:
all_players = pd.concat([
    deliveries_raw_dataset['batter'],
    deliveries_raw_dataset['bowler'],
    deliveries_raw_dataset['non_striker']
])
unique_players = all_players.dropna().unique()

players_df = pd.DataFrame({
    'player_id': range(1, len(unique_players) + 1),
    'player_name': unique_players,
    'country': None,
    'played_international':None,
    'ipl_time_span':None,
    'no_of_ipl_matches':None,
    'bowling_style':None,
    'batting_style':None,
    'player_role':None
})


players_df.head()

Unnamed: 0,player_id,player_name,country,played_international,ipl_time_span,no_of_ipl_matches,bowling_style,batting_style,player_role
0,1,SC Ganguly,,,,,,,
1,2,BB McCullum,,,,,,,
2,3,RT Ponting,,,,,,,
3,4,DJ Hussey,,,,,,,
4,5,Mohammad Hafeez,,,,,,,


# Scraping Player Data

In [21]:
import requests
from bs4 import BeautifulSoup

In [28]:
# Define the URL
url = "https://www.espncricinfo.com/records/trophy/indian-premier-league-117"

# Send a GET request
response = requests.get(url)
if response.status_code == 200:
    # Parse the content with BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the "Player averages" section by its title
    player_averages_section = soup.find('span', string='Player averages')
    if player_averages_section:
        # Locate the parent container
        parent_div = player_averages_section.find_parent('div', class_="ds-w-full ds-bg-fill-content-prime ds-overflow-hidden ds-rounded-xl ds-border ds-border-line ds-mb-2")
        if parent_div:
            # Find all links within the section
            links = parent_div.find_all('a', href=True)
            
            # Prepare data for DataFrame
            team_links = []
            for link in links:
                team_name = link.find('span').text.strip() if link.find('span') else None
                full_link = "https://www.espncricinfo.com" + link['href']
                if team_name:  # Avoid empty team names
                    team_links.append({'Team': team_name, 'Link': full_link})
            
            # Create a DataFrame
            team_links_df = pd.DataFrame(team_links)
        else:
            print("Could not locate the parent container for 'Player averages'.")
    else:
        print("'Player averages' section not found.")
else:
    print(f"Failed to fetch the webpage. Status code: {response.status_code}")



In [29]:
team_links_df.head()

Unnamed: 0,Team,Link
0,Chennai Super Kings,https://www.espncricinfo.com/records/trophy/av...
1,Deccan Chargers,https://www.espncricinfo.com/records/trophy/av...
2,Delhi Daredevils,https://www.espncricinfo.com/records/trophy/av...
3,Gujarat Lions,https://www.espncricinfo.com/records/trophy/av...
4,Gujarat Titans,https://www.espncricinfo.com/records/trophy/av...


In [None]:
players_df.to_csv('../datasets/players_dataset.csv', index=False)