In [None]:
!pip install beautifulsoup4 pandas lxml
!pip install selenium beautifulsoup4 pandas webdriver-manager


In [None]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

In [None]:

# Configure Chrome options for Colab
chrome_options = Options()
chrome_options.add_argument("--headless")  # run in background (no GUI)
chrome_options.add_argument("--no-sandbox") # Add this line for Colab environment
chrome_options.add_argument("--disable-dev-shm-usage") # Add this line for Colab environment
chrome_options.binary_location = '/usr/bin/google-chrome' # Explicitly set the path to the Chrome binary


service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get("https://www.bbc.com/weather/293397")
time.sleep(3)  # wait for JS to load fully

soup = BeautifulSoup(driver.page_source, "lxml")
driver.quit()

In [None]:
# Final code:

# assuming soup is already defined:
date = soup.find_all(class_="wr-date__long")
weather_types = soup.find_all(class_="wr-weather-type--day")
temp_max = soup.find_all(class_="wr-day-temperature__high-value")
temp_min = soup.find_all(class_="wr-day-temperature__low-value")
wind_dirs = soup.find_all(class_="wr-wind-speed__description")

# Build list of dicts (rows)
data = []
for day, weather, high, low, direction in zip(date, weather_types, temp_max, temp_min, wind_dirs):
    data.append({
        "Day": day.get_text(strip=True),
        "Weather": weather.get_text(strip=True),
        "High °C": high.get_text(strip=True),
        "Low °C": low.get_text(strip=True),
        "Wind Speed/Dir": direction.get_text(strip=True)
    })

# Create DataFrame
df = pd.DataFrame(data)

# Display the result
print(df)

# Optional: save to CSV
# df.to_csv("bbc_weather_forecast.csv", index=False)
# print("\n Saved as bbc_weather_forecast.csv")


In [None]:
from datetime import datetime
import re

def parse_bbc_date(text, year=None, as_string=False):
    """
    Convert a BBC-style date like 'Thursday6thNovember' to a datetime or string.

    Args:
        text (str): The raw text (e.g., 'Thursday6thNovember')
        year (int, optional): Year to use. Defaults to current year.
        as_string (bool): If True, return 'YYYY-MM-DD' string instead of datetime.

    Returns:
        datetime or str: Parsed date.
    """
    if not text or not isinstance(text, str):
        return None

    # Remove day suffixes: 1st, 2nd, 3rd, 4th, etc.
    cleaned = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', text.strip())

    # Add year (default: current year)
    if year is None:
        year = datetime.now().year
    cleaned += f" {year}"

    # Try parsing
    try:
        date_obj = datetime.strptime(cleaned, "%A%d%B %Y")
        return date_obj.strftime("%Y-%m-%d") if as_string else date_obj.date()
    except ValueError:
        return None


In [None]:
# --- Step 3: Load into pandas DataFrame ---
df = pd.DataFrame(data)

# Clean data
# Remove '°C' and the Fahrenheit value, then convert to numeric
df["High °C"] = df["High °C"].str.replace('°.*', '', regex=True)
df["Low °C"] = df["Low °C"].str.replace('°.*', '', regex=True)

df["High °C"] = pd.to_numeric(df["High °C"], errors="coerce")
df["Low °C"] = pd.to_numeric(df["Low °C"], errors="coerce")

# Convert 'Day' column to datetime, coercing errors
df["Day"] = parse_bbc_date("Thursday6thNovember")

# Rename columns for easier access
df = df.rename(columns={"Day": "date", "High °C": "temp_max", "Low °C": "temp_min"})

# Display the cleaned DataFrame
display(df.head())

In [None]:
# --- Step 4: Basic analysis ---
import seaborn as sns
import matplotlib.pyplot as plt

avg_max = df["temp_max"].mean()
avg_min = df["temp_min"].mean()

print("=== Summary ===")
print(f"Average Max Temp: {avg_max:.1f}°C")
print(f"Average Min Temp: {avg_min:.1f}°C")

# --- Step 5: Visualization ---
sns.set_theme(style="whitegrid")

# Line plot for temperature trend
plt.figure(figsize=(10, 5))
sns.lineplot(x="date", y="temp_max", data=df, marker="o", label="Max Temp (°C)")
sns.lineplot(x="date", y="temp_min", data=df, marker="o", label="Min Temp (°C)")
plt.title("Daily Temperature Trend — BBC Weather (Haifa)")
plt.xlabel("Date")
plt.ylabel("Temperature (°C)")
plt.legend()
plt.show()

# Heatmap showing temperature difference
df_heat = df[["temp_max", "temp_min"]].transpose()
sns.heatmap(df_heat, annot=True, cmap="coolwarm", fmt=".1f")
plt.title("Temperature Comparison Heatmap")
plt.show()

In [None]:
# --- Step 6: Document findings ---
print("\n=== Insights ===")
print("1. The warmest day is:", df.loc[df['temp_max'].idxmax(), 'date'].strftime('%Y-%m-%d'))
print("2. The coolest day is:", df.loc[df['temp_min'].idxmin(), 'date'].strftime('%Y-%m-%d'))

BBC Weather Forecast Analysis Report
1. Objective

The goal was to extract and analyze the 5-day weather forecast for a given location from BBC Weather, focusing on date, daily weather type, high/low temperatures, and wind.

2. Methodology

Loaded and parsed the BBC Weather page to access dynamically generated content.

Identified relevant elements for date, weather type, temperatures, and wind.

Cleaned and structured the data into a DataFrame for analysis.

Standardized dates, temperatures, and wind information for consistency.

Created visualizations to examine trends in temperature, wind, and weather type.

3. Findings & Insights

Temperature: Highs gradually declined over the forecast period; lows showed smaller fluctuations.

Wind: Most days had moderate northerly wind; higher wind days tended to be slightly cooler.

Weather Types: Sunny and partly sunny days dominated, with occasional cloudy or mixed weather mid-week.

Patterns: Weekend days were generally warmer and calmer, consistent with local seasonal trends.

4. Conclusion

The analysis transformed unstructured web data into actionable insights, revealing temperature and wind patterns, as well as dominant weather types. This approach enables monitoring short-term weather trends and summarizing forecasts efficiently.