In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Base URL
base_url = 'http://www.wikicfp.com/cfp/call?conference=computer%20science&page='

# Keyword to search for
keyword = 'computer science'

# Initialize a list to store the data
data = []

# Loop through the first 10 pages
for page in range(1, 11):
    # Modify URL for each page
    url = base_url + str(page)

    # Send a GET request
    response = requests.get(url)

    # Check if request was successful
    if response.status_code == 200:
        # Parse HTML
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all table rows
        rows = soup.find_all('tr')

        # Iterate over each row
        for i in range(len(rows)):
            if rows[i].find('a'):  # Check if the row contains a link
                name_row = rows[i]
                details_row = rows[i + 1] if i + 1 < len(rows) else None

                # Check if the row contains the keyword and details_row is not None
                if details_row and keyword.lower() in name_row.text.lower():
                    # Try to extract details
                    name_td = name_row.find('td', {'align': 'left', 'colspan': '3'})
                    date_tds = details_row.find_all('td', {'align': 'left'})

                    # Check if all elements are present
                    if name_td and len(date_tds) == 3:
                        name = name_td.text.strip()
                        dates = date_tds[0].text.strip()
                        place = date_tds[1].text.strip()
                        deadline = date_tds[2].text.strip()

                        # Skip if it matches the header format
                        if dates == "Event" and place == "When" and deadline == "Where":
                            continue

                        # Append the data
                        data.append({
                            "Name": name,
                            "Dates": dates,
                            "Place": place,
                            "Deadline": deadline
                        })

    else:
        print(f"Failed to retrieve page {page}.")

# Create a DataFrame from the data
df = pd.DataFrame(data)


# Add a new column 'Country'
df['Country'] = df['Place'].apply(lambda x: 'Online' if 'online' in x.lower() or 'virtual' in x.lower() else x.split(', ')[-1])

# Remove missing values
df.dropna(inplace=True)

# Calculate the distribution of places
distribution = df['Country'].value_counts()

df.head()
# Print the top and bottom countries
print("Top Countries:")
print(distribution.head())
print("\nBottom Countries:")
print(distribution.tail())
