# Tasks for laboratory assignment 1

In [113]:
# imports section

import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from datetime import datetime
import csv
import numpy as np
import matplotlib.pyplot as plt

## Extract webpage data given the url

Create a Python script that performs basic web scraping on a page to extract all the information into text and returns it as a string.
String should not contain tags.

In [114]:
def parse_web_page(url):
    """
    Fetch the content of the given web page and return it as text without HTML tags.

    Args:
        url (str): The URL of the web page to fetch.

    Returns:
        str: The content of the page as a string without HTML tags.
    """
    response = requests.get(url)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')

    return soup.get_text(strip=True)

print(parse_web_page('https://fmi.chnu.edu.ua/')[:255])
print(parse_web_page('https://en.wikipedia.org/wiki/Web_scraping')[:255])

Головна - Факультет математики та інформатикиПерейти до основного вмісту[email protected]58012, Україна, м. Чернівці, вул. Університетська, 28НовиниВсіЗагальніОголошенняПодіїСтудентуВикладачуВітанняДіяльністьНауковаНавчально-методичнаМіжнароднаОрганізацій
Web scraping - WikipediaJump to contentMain menuMain menumove to sidebarhideNavigationMain pageContentsCurrent eventsRandom articleAbout WikipediaContact usContributeHelpLearn to editCommunity portalRecent changesUpload fileSearchSearchAppearanceDonateCre


## Get data from the API

Create a python script that performs basic request to API endpoint and saves that data to a JSON file `result.json`.

In [115]:
def parse_api(api_url):
    """
    Fetch the data of the given API endpoint and save it to result.json.

    Args:
        api_url (str): The URL of the API endpoint.

    Returns:
        None.

    Raises:
        HTTPError: If the HTTP request returned an unsuccessful status code.
    """
    response = requests.get(api_url)
    response.raise_for_status()
    data = response.json()
    with open('result.json', 'w') as f:
        json.dump(data, f, indent=4)

parse_api('https://api.github.com/')

## Parse the json file

Parse the `weather.json` file and return weather data for a specific date, that is given as a parameter. Return the data as an array.

In [116]:
def parse_json(date):
    """
    Parse the data from weather.json file and return weather data for a given date.

    Args:
        date (str): The date for which we look up the weather.

    Returns:
        list: a list of weather data for a given date.
    """
    with open('resources/weather.json', 'r') as f:
        data = json.load(f)
    
    result = [entry for entry in data['daily'] if entry['date'] == date]
    return result
    
target_date = '2024-08-19'
print(parse_json(target_date))

[{'date': '2024-08-19', 'max_temperature': 30.0, 'min_temperature': 21.0, 'precipitation': 5.0, 'wind_speed': 10.0, 'humidity': 70, 'weather_description': 'Light rain'}]


## Parse the csv file

Parse the `weather.csv` file and return weather data for a specific date, that is given as a parameter. Return the data as an array.

In [117]:
def parse_csv(date):
    """
    Parse the data from weather.csv file and return weather data for a given date.

    Args:
        date (str): The date for which we look up the weather.

    Returns:
        list: a list of weather data for a given date.
    """
    with open('resources/weather.csv', 'r') as f:
        reader = csv.DictReader(f)
        result = [row for row in reader if row['CET'].strip() == date.strip()]
    
    return result

    
target_date = '1997-5-22'
print(parse_csv(target_date))

[{'CET': '1997-5-22', 'Max TemperatureC': '25', 'Mean TemperatureC': '18', 'Min TemperatureC': '10', 'Dew PointC': '11', 'MeanDew PointC': '8', 'Min DewpointC': '6', 'Max Humidity': '88', ' Mean Humidity': '54', ' Min Humidity': '34', ' Max Sea Level PressurehPa': '1017', ' Mean Sea Level PressurehPa': '1015', ' Min Sea Level PressurehPa': '1012', ' Max VisibilityKm': '10', ' Mean VisibilityKm': '10', ' Min VisibilitykM': '10', ' Max Wind SpeedKm/h': '11', ' Mean Wind SpeedKm/h': '3', ' Max Gust SpeedKm/h': '', 'Precipitationmm': '0.00', ' CloudCover': '3', ' Events': '', 'WindDirDegrees': '277'}]


## Visualize data

Visualize the `weather.csv` data using matplotlib. Choose your own approach to data visualization. Save the results (as `.png`, `.webp` files etc., your choise) in this repository. 

In [None]:
def visualize_data():
    """
    Parse the data from weather.csv file and visualize it using Matplotlib. Use more then one visualization. 
    Save the results in the repository.

    Args:
        None: None.

    Returns:
        None: None.
    """
    dates = []
    max_temps = []
    min_temps = []
    wind_speeds = []

    with open('resources/weather.csv', 'r') as f:
        reader = csv.DictReader(f)

        fieldnames = [field.strip() for field in reader.fieldnames]
        reader.fieldnames = fieldnames  # Оновлюємо стовпці

        for column in reader.fieldnames:
            print(f"'{column}'")

        for row in reader:
            date = row['CET']
            max_temp = row['Max TemperatureC']
            min_temp = row['Min TemperatureC']
            wind_speed = row['Max Wind SpeedKm/h'].strip() if 'Max Wind SpeedKm/h' in row else None

            if max_temp and min_temp and wind_speed:
                dates.append(date)
                max_temps.append(float(max_temp))
                min_temps.append(float(min_temp))

                if wind_speed:
                    wind_speeds.append(float(wind_speed))
                else:
                    wind_speeds.append(0)  

    # 1. Графік максимальних та мінімальних температур
    plt.figure(figsize=(10, 5))
    plt.plot(dates, max_temps, label='Max Temperature (°C)', color='red')
    plt.plot(dates, min_temps, label='Min Temperature (°C)', color='blue')
    plt.xlabel('Date')
    plt.ylabel('Temperature (°C)')
    plt.title('Max and Min Temperatures')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.savefig('max_min_temperatures.png')
    plt.savefig('max_min_temperatures.webp')
    plt.clf()  

    # 2. Графік швидкості вітру
    plt.figure(figsize=(10, 5))
    plt.plot(dates, wind_speeds, label='Max Wind Speed (km/h)', color='green')
    plt.xlabel('Date')
    plt.ylabel('Wind Speed (km/h)')
    plt.title('Max Wind Speed')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.savefig('wind_speed.png')
    plt.savefig('wind_speed.webp')
    plt.clf()

visualize_data()

visualize_data()

'CET'
'Max TemperatureC'
'Mean TemperatureC'
'Min TemperatureC'
'Dew PointC'
'MeanDew PointC'
'Min DewpointC'
'Max Humidity'
'Mean Humidity'
'Min Humidity'
'Max Sea Level PressurehPa'
'Mean Sea Level PressurehPa'
'Min Sea Level PressurehPa'
'Max VisibilityKm'
'Mean VisibilityKm'
'Min VisibilitykM'
'Max Wind SpeedKm/h'
'Mean Wind SpeedKm/h'
'Max Gust SpeedKm/h'
'Precipitationmm'
'CloudCover'
'Events'
'WindDirDegrees'
'CET'
'Max TemperatureC'
'Mean TemperatureC'
'Min TemperatureC'
'Dew PointC'
'MeanDew PointC'
'Min DewpointC'
'Max Humidity'
'Mean Humidity'
'Min Humidity'
'Max Sea Level PressurehPa'
'Mean Sea Level PressurehPa'
'Min Sea Level PressurehPa'
'Max VisibilityKm'
'Mean VisibilityKm'
'Min VisibilitykM'
'Max Wind SpeedKm/h'
'Mean Wind SpeedKm/h'
'Max Gust SpeedKm/h'
'Precipitationmm'
'CloudCover'
'Events'
'WindDirDegrees'


<Figure size 1000x500 with 0 Axes>

<Figure size 1000x500 with 0 Axes>

<Figure size 1000x500 with 0 Axes>

<Figure size 1000x500 with 0 Axes>