In [1]:
import requests
from bs4 import BeautifulSoup
import json
import csv
from datetime import datetime

def fetch_weather_data(url):
    """Fetch weather data from the specified URL."""
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data: HTTP {response.status_code}")

    soup = BeautifulSoup(response.content, 'html.parser')
    # This part should be customized based on the website structure
    weather_data = []
    for row in soup.select('table.weather-table tr'):  # Adjust selector as per site
        cells = row.find_all('td')
        if len(cells) >= 2:  # Assuming at least two columns for date and temperature
            date_str = cells[0].get_text(strip=True)
            temp = cells[1].get_text(strip=True)
            try:
                date = datetime.strptime(date_str, "%Y-%m-%d")
                weather_data.append({"date": date_str, "temperature": temp})
            except ValueError:
                continue  # Skip rows with invalid date formats

    return weather_data

def filter_weather_data(data, start_date=None, end_date=None):
    """Filter weather data by date range."""
    if start_date:
        start_date = datetime.strptime(start_date, "%Y-%m-%d")
    if end_date:
        end_date = datetime.strptime(end_date, "%Y-%m-%d")

    filtered_data = []
    for entry in data:
        entry_date = datetime.strptime(entry["date"], "%Y-%m-%d")
        if (not start_date or entry_date >= start_date) and (not end_date or entry_date <= end_date):
            filtered_data.append(entry)

    return filtered_data

def save_to_json(data, output_file):
    """Save weather data to a JSON file."""
    with open(output_file, 'w') as f:
        json.dump(data, f, indent=4)

def save_to_csv(data, output_file):
    """Save weather data to a CSV file."""
    with open(output_file, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=["date", "temperature"])
        writer.writeheader()
        writer.writerows(data)

# Interactive inputs for Colab
url = input("Enter the URL of the website to scrape weather data from: ")
start_date = input("Enter the start date (YYYY-MM-DD) for filtering (or leave blank): ")
end_date = input("Enter the end date (YYYY-MM-DD) for filtering (or leave blank): ")
output_file = input("Enter the output file name (e.g., output.json or output.csv): ")
output_format = input("Enter the output format (json or csv): ").lower()

try:
    print("Fetching weather data...")
    data = fetch_weather_data(url)
    print(f"Fetched {len(data)} records.")

    if start_date or end_date:
        print("Filtering weather data...")
        data = filter_weather_data(data, start_date, end_date)
        print(f"Filtered to {len(data)} records.")

    if output_format == "json":
        print("Saving data to JSON...")
        save_to_json(data, output_file)
    elif output_format == "csv":
        print("Saving data to CSV...")
        save_to_csv(data, output_file)

    print(f"Data saved to {output_file}")
except Exception as e:
    print(f"Error: {e}")


Enter the URL of the website to scrape weather data from: nadiad
Enter the start date (YYYY-MM-DD) for filtering (or leave blank): 2000-05-12
Enter the end date (YYYY-MM-DD) for filtering (or leave blank): 2000-05-10
Enter the output file name (e.g., output.json or output.csv): json
Enter the output format (json or csv): json
Fetching weather data...
Error: Invalid URL 'nadiad': No scheme supplied. Perhaps you meant https://nadiad?
