# Tasks for laboratory assignment 1

In [2]:
# imports section

import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from datetime import datetime
import csv
import numpy as np
import matplotlib.pyplot as plt

## Extract webpage data given the url

Create a Python script that performs basic web scraping on a page to extract all the information into text and returns it as a string.
String should not contain tags.

In [15]:
import re

def remove_repetition_of_whitespaces(text: str) -> str:
    return re.sub(r"\s\s+", lambda s: s.string[s.start()], text)

print(remove_repetition_of_whitespaces("Test    \n\ntest\n\t\t test\t\n\n test"))

def remove_repetition_of_whitespaces_decorator(callback: callable):
    def result(*args, **kwargs):
        return remove_repetition_of_whitespaces(callback(*args, **kwargs))
    
    return result

Test test
test	test


In [19]:
@remove_repetition_of_whitespaces_decorator
def parse_web_page(url):
    """
    Fetch the content of the given web page.

    Args:
        url (str): The URL of the web page to fetch.

    Returns:
        str: The content of the page as a string.

    Raises:
        HTTPError: If the HTTP request returned an unsuccessful status code.
    """
    print(f"url: {url}")
    response = requests.get(url)
    print(f"status code: {response.status_code}")
    response.raise_for_status()
    return BeautifulSoup(response.text).text

print(parse_web_page('https://fmi.chnu.edu.ua/')[:255])
print(parse_web_page('https://en.wikipedia.org/wiki/Web_scraping')[:255])

url: https://fmi.chnu.edu.ua/
status code: 200

Головна - Факультет математики та інформатики
Перейти до основного вмісту
[email protected]
Новини Україна, м. Чернівці, вул. Університетська, 28
Всі
Загальні
Оголошення
Події
Студенту
Викладачу
Вітання
Діяльність
Наукова
Навчально-методична
Міжна
url: https://en.wikipedia.org/wiki/Web_scraping
status code: 200

Web scraping - Wikipedia
Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main pageContentsCurrent eventsRandom articleAbout WikipediaContact usDonate
Contribute
HelpLearn to editCommunity portalRecent changesUpload file
Search
Search



## Get data from the API

Create a python script that performs basic request to API endpoint and saves that data to a JSON file `result.json`.

In [30]:
def parse_api(api_url):
    """
    Fetch the data of the given API endpoint and save it to result.json.

    Args:
        api_url (str): The URL of the API endpoint.

    Returns:
        None.

    Raises:
        HTTPError: If the HTTP request returned an unsuccessful status code.
    """
    print(f"api_url: {api_url}")
    response = requests.get(api_url)
    print(f"status code: {response.status_code}")
    response.raise_for_status()

    try:
        response_json = response.json()
    except Exception as error:
        print("An error occurred during the call response.json().\n"
              f"  Error: {type(error)}\n"
              f"  Error message: {error}\n")
        return None
    
    print(f"response.json(): {response_json}")
    
    with open("result.json", "w", encoding="utf-8") as file:
        json.dump(response_json, file)

    return None

parse_api('https://api.github.com/')
parse_api('https://fmi.chnu.edu.ua/')

api_url: https://api.github.com/
status code: 200
response.json(): {'current_user_url': 'https://api.github.com/user', 'current_user_authorizations_html_url': 'https://github.com/settings/connections/applications{/client_id}', 'authorizations_url': 'https://api.github.com/authorizations', 'code_search_url': 'https://api.github.com/search/code?q={query}{&page,per_page,sort,order}', 'commit_search_url': 'https://api.github.com/search/commits?q={query}{&page,per_page,sort,order}', 'emails_url': 'https://api.github.com/user/emails', 'emojis_url': 'https://api.github.com/emojis', 'events_url': 'https://api.github.com/events', 'feeds_url': 'https://api.github.com/feeds', 'followers_url': 'https://api.github.com/user/followers', 'following_url': 'https://api.github.com/user/following{/target}', 'gists_url': 'https://api.github.com/gists{/gist_id}', 'hub_url': 'https://api.github.com/hub', 'issue_search_url': 'https://api.github.com/search/issues?q={query}{&page,per_page,sort,order}', 'issues_

## Parse the json file

Parse the `weather.json` file and return weather data for a specific date, that is given as a parameter. Return the data as an array.

In [31]:
def are_dates_same(date1: str, date2: str) -> bool:
    if date1 == date2:
        return True

    return datetime.strptime(date1, "%Y-%m-%d") == datetime.strptime(date2, "%Y-%m-%d")

def parse_json(date):
    """
    Parse the data from weather.json file and return weather data for a given date.

    Args:
        date (str): The date for which we look up the weather.

    Returns:
        list: a list of weather data for a given date.
    """
    weather_json_path = './resources/weather.json'
    with open(weather_json_path, 'r', encoding="utf-8") as file:
        data = json.load(file)
    
    result = []
    try:
        daily_weather = data['daily']
        for day_info in daily_weather:
            if are_dates_same(day_info['date'], date):
                result.append(day_info)
    except Exception as error:
        print("An error occurred during parsing\n"
              f"  Error: {type(error)}\n"
              f"  Error message: {error}\n")
    
    return result
    
target_date = '2024-8-19'
print(parse_json(target_date))

[{'date': '2024-08-19', 'max_temperature': 30.0, 'min_temperature': 21.0, 'precipitation': 5.0, 'wind_speed': 10.0, 'humidity': 70, 'weather_description': 'Light rain'}]


## Parse the csv file

Parse the `weather.csv` file and return weather data for a specific date, that is given as a parameter. Return the data as an array.

In [None]:
def parse_csv(date):
    """
    Parse the data from weather.csv file and return weather data for a given date.

    Args:
        date (str): The date for which we look up the weather.

    Returns:
        list: a list of weather data for a given date.
    """
    return None
    
target_date = '1997-5-22'
print(parse_csv(target_date))

## Visualize data

Visualize the `weather.csv` data using matplotlib. Choose your own approach to data visualization. Save the results (as `.png`, `.webp` files etc., your choise) in this repository. 

In [None]:
def visualize_data():
    """
    Parse the data from weather.csv file and visualize it using Matplotlib. Use more then one visualization. 
    Save the results in the repository.

    Args:
        None: None.

    Returns:
        None: None.
    """
    return None

visualize_data()