In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from time import sleep
from typing import Dict, List

In [2]:
HEADERS = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
}
SLEEP_BETWEEN_REQUESTS = 1.0

In [15]:
def fetch_city_weather(url: str) -> Dict[str, str]:
    """
    Fetches and parses a timeanddate.com city weather page.
    Returns a dict with keys: City, DateTime, Temperature, Condition.
    """
    r = requests.get(url, headers=HEADERS, timeout=10)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "lxml")

    city_tag = soup.find("h1")
    city = city_tag.get_text(strip=True) if city_tag else "Unknown"

    temp = None
    for sel in [
        "div.h2",                     
        "div#qlook .h2",                
        "div#qlook .temp",              
        "div[class*='temp']"
    ]:
        el = soup.select_one(sel)
        if el and el.get_text(strip=True):
            temp = el.get_text(" ", strip=True)
            break
    if temp:
        temp = temp.replace("\n", " ").strip()

    condition = None
    cond_sel = soup.select_one("div#qlook p") or soup.select_one("div#qlook .small")
    if cond_sel:
        condition = cond_sel.get_text(" ", strip=True)
    else:
        meta = soup.find("meta", {"name": "description"})
        condition = meta["content"] if meta and meta.get("content") else "N/A"

    datetime = None
    ttag = soup.find(id="ct") or soup.select_one("div#qfacts .h3") or soup.select_one("div#qfacts")
    if ttag:
        datetime = ttag.get_text(" ", strip=True)
    else:
        small = soup.find("small")
        datetime = small.get_text(" ", strip=True) if small else "N/A"

    return {
        "City": city,
        "DateTime": datetime or "N/A",
        "Temperature": temp or "N/A",
        "Condition": condition or "N/A",
        "Source_URL": url
    }


In [21]:
city_urls = [
    "https://www.timeanddate.com/weather/usa/new-york",
    "https://www.timeanddate.com/weather/india/new-delhi",
    "https://www.timeanddate.com/weather/uk/london",
    "https://www.timeanddate.com/weather/australia/sydney",
    "https://www.timeanddate.com/weather/japan/tokyo"
]

results: List[Dict[str, str]] = []
for url in city_urls:
    try:
        data = fetch_city_weather(url)
        results.append(data)
    except Exception as e:
        results.append({
            "City": "ERROR",
            "DateTime": "N/A",
            "Temperature": "N/A",
            "Condition": f"Failed: {e}",
            "Source_URL": url
        })
    sleep(SLEEP_BETWEEN_REQUESTS)


In [23]:
df = pd.DataFrame(results, columns=["City", "DateTime", "Temperature", "Condition", "Source_URL"])
print(df)        
df.to_csv("weather.csv", index=False)
print("Saved to weather.csv")

                                            City DateTime Temperature  \
0             Weather in New York, New York, USA      N/A       13 °C   
1             Weather in New Delhi, Delhi, India      N/A       14 °C   
2     Weather in London, England, United Kingdom      N/A       13 °C   
3  Weather in Sydney, New South Wales, Australia      N/A       30 °C   
4                        Weather in Tokyo, Japan      N/A       13 °C   

            Condition                                         Source_URL  
0              Clear.   https://www.timeanddate.com/weather/usa/new-york  
1                Fog.  https://www.timeanddate.com/weather/india/new-...  
2         Low clouds.      https://www.timeanddate.com/weather/uk/london  
3              Sunny.  https://www.timeanddate.com/weather/australia/...  
4  Refreshingly cool.    https://www.timeanddate.com/weather/japan/tokyo  
Saved to weather.csv


In [30]:
def parse_temp(temp_str: str):
    if temp_str in (None, "N/A"):
        return (None, None)
    import re
    m = re.search(r"(-?\d+\.?\d*)\s*°\s*([CFcf])", temp_str)
    if m:
        return (float(m.group(1)), m.group(2).upper())
    m2 = re.search(r"(-?\d+\.?\d*)", temp_str)
    return (float(m2.group(1)), None) if m2 else (None, None)

df["Temp_Value"], df["Temp_Unit"] = zip(*df["Temperature"].map(parse_temp))
print(df[["City", "Temperature", "Temp_Value", "Temp_Unit"]])
df.to_csv("weather.csv", index=False) 

                                            City Temperature  Temp_Value  \
0             Weather in New York, New York, USA       13 °C        13.0   
1             Weather in New Delhi, Delhi, India       14 °C        14.0   
2     Weather in London, England, United Kingdom       13 °C        13.0   
3  Weather in Sydney, New South Wales, Australia       30 °C        30.0   
4                        Weather in Tokyo, Japan       13 °C        13.0   

  Temp_Unit  
0         C  
1         C  
2         C  
3         C  
4         C  
