In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import csv

# URL of the PickBanHistory page
url = "https://lol.fandom.com/wiki/Special:RunQuery/PickBanHistory?PBH%5Bpage%5D=MSI+2025&PBH%5Btextonly%5D=Yes&_run="

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the table containing the pick/ban data
    tables = soup.find_all('table')
    
    if tables:
        # Convert the HTML table to a pandas DataFrame
        dfs = pd.read_html(str(tables[0]))
        
        if dfs:
            df = dfs[0]
            print("Pick/Ban History Data:")
            print(df.head())
            
            # Display basic information about the dataframe
            print("\nDataframe Info:")
            print(f"Number of rows: {len(df)}")
            print(f"Columns: {list(df.columns)}")
            
           
            
            # You can perform further analysis here
            # For example, count the most banned champions
            if 'BB1' in df.columns:
                all_bans = []
                for col in df.columns:
                    if col.startswith('B'):
                        all_bans.extend(df[col].dropna().tolist())
                
                ban_counts = pd.Series(all_bans).value_counts()
                print("\nMost Banned Champions:")
                print(ban_counts.head())
        else:
            print("No data found in the table.")
    else:
        print("No tables found on the page.")
else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")


Pick/Ban History Data:
  Pick and Ban Order - MSI 2025                                             \
                          Phase   Blue    Red  Score Winner  Patch     BB1   
0                        Finals     T1  Gen.G  2 - 3      2  25.13  Maokai   
1                        Finals  Gen.G     T1  2 - 2      1  25.13   Neeko   
2                        Finals     T1  Gen.G  2 - 1      1  25.13  Viktor   
3                        Finals  Gen.G     T1  1 - 1      1  25.13   Neeko   
4                        Finals  Gen.G     T1  1 - 0      2  25.13   Neeko   

                               ...                                            \
        RB1     BB2       RB2  ...      BR3     BR4      BR5     RR1     RR2   
0  Pantheon   Neeko     Varus  ...  Support  Jungle      Mid  Jungle     Top   
1  Pantheon   Sylas     Varus  ...      Top     Bot  Support  Jungle     Bot   
2     Varus   Neeko  Pantheon  ...      Top     Bot  Support     Top  Jungle   
3  Pantheon  Rumble     Varus 

  dfs = pd.read_html(str(tables[0]))


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80 entries, 0 to 79
Data columns (total 35 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   (Pick and Ban Order - MSI 2025, Phase)   80 non-null     object 
 1   (Pick and Ban Order - MSI 2025, Blue)    80 non-null     object 
 2   (Pick and Ban Order - MSI 2025, Red)     80 non-null     object 
 3   (Pick and Ban Order - MSI 2025, Score)   80 non-null     object 
 4   (Pick and Ban Order - MSI 2025, Winner)  80 non-null     int64  
 5   (Pick and Ban Order - MSI 2025, Patch)   80 non-null     float64
 6   (Pick and Ban Order - MSI 2025, BB1)     80 non-null     object 
 7   (Pick and Ban Order - MSI 2025, RB1)     80 non-null     object 
 8   (Pick and Ban Order - MSI 2025, BB2)     80 non-null     object 
 9   (Pick and Ban Order - MSI 2025, RB2)     80 non-null     object 
 10  (Pick and Ban Order - MSI 2025, BB3)     80 non-null

In [8]:
# Ensure columns are single-level names so we can reference them directly
if isinstance(df.columns, pd.MultiIndex):
    df.columns = [col[1] if isinstance(col, tuple) else col for col in df.columns]

# Convert Winner, Score, Patch to float types
# Winner and Patch are already numeric-like; coerce to float
if 'Winner' in df.columns:
    df['Winner'] = pd.to_numeric(df['Winner'], errors='coerce').astype(float)

if 'Patch' in df.columns:
    df['Patch'] = pd.to_numeric(df['Patch'], errors='coerce').astype(float)



# Convert all other columns to categorical datatype
categorical_columns = [col for col in df.columns if col not in ['Winner', 'Patch']]
for col in categorical_columns:
    df[col] = df[col].astype('category')

# Display the updated datatypes
print("\nUpdated DataFrame Data Types:")
print(df.dtypes)



Updated DataFrame Data Types:
Phase     category
Blue      category
Red       category
Score     category
Winner     float64
Patch      float64
BB1       category
RB1       category
BB2       category
RB2       category
BB3       category
RB3       category
BP1       category
RP1-2     category
BP2-3     category
RP3       category
RB4       category
BB4       category
RB5       category
BB5       category
RP4       category
BP4-5     category
RP5       category
BR1       category
BR2       category
BR3       category
BR4       category
BR5       category
RR1       category
RR2       category
RR3       category
RR4       category
RR5       category
SB        category
VOD       category
dtype: object
