In [None]:
'''
  Title: Web Scraping Project - Hockey Teams
  Name: Rodney Roy Gitonga
  Cybershujaa id: CS-DA26025
  Date: 15 January 2026
  Description: This script scrapes hockey team data from scrapethissite.com,
  parses the HTML using BeautifulSoup, organizes the data into a Pandas DataFrame,
  and exports it to a CSV file.
'''

# Import libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd

# Send HTTP Request
# Define the target URL and use requests to fetch the page content.
url = 'https://www.scrapethissite.com/pages/forms/'
response = requests.get(url)

# Check request was successful (Status Code 200)
if response.status_code == 200:
    print("Successfully connected to the website!")
else:
    print(f"Failed to connect. Status code: {response.status_code}")

# Parse HTML Content
# Initialize BeautifulSoup to parse the text of the response
soup = BeautifulSoup(response.text, 'html.parser')

# Locate Target Table
# We look for the first table with the class 'table'
hockey_table = soup.find('table', class_='table')

# Extract Column Headers
# Find all table header cells ('th') and strip whitespace
header_tags = hockey_table.find_all('th')
columns = [header.text.strip() for header in header_tags]
print(f"Columns found: {columns}")

# Extract Row Data
# Find all table rows ('tr'). Skip the first row [1:] because it contains headers.
rows = hockey_table.find_all('tr')
extracted_data = []

for row in rows[1:]:
    # Find all data cells ('td') in the current row
    cells = row.find_all('td')
    # Clean the text for each cell
    row_data = [cell.text.strip() for cell in cells]
    # Append the clean row to our list
    extracted_data.append(row_data)

# Create DataFrame
# Convert the list of lists into a Pandas DataFrame
df = pd.DataFrame(extracted_data, columns=columns)

# Data Inspection
# Display the first 5 rows to verify data integrity
print("\nFirst 5 rows of the dataset:")
print(df.head())

# Export to CSV
# Save the file without the pandas index column
csv_filename = 'Hockey_Data_Scraped.csv'
df.to_csv(csv_filename, index=False)
print(f"\nData successfully saved to {csv_filename}")

Successfully connected to the website!
Columns found: ['Team Name', 'Year', 'Wins', 'Losses', 'OT Losses', 'Win %', 'Goals For (GF)', 'Goals Against (GA)', '+ / -']

First 5 rows of the dataset:
            Team Name  Year Wins Losses OT Losses  Win % Goals For (GF)  \
0       Boston Bruins  1990   44     24             0.55            299   
1      Buffalo Sabres  1990   31     30            0.388            292   
2      Calgary Flames  1990   46     26            0.575            344   
3  Chicago Blackhawks  1990   49     23            0.613            284   
4   Detroit Red Wings  1990   34     38            0.425            273   

  Goals Against (GA) + / -  
0                264    35  
1                278    14  
2                263    81  
3                211    73  
4                298   -25  

Data successfully saved to Hockey_Data_Scraped.csv
