In [2]:
import requests
from bs4 import BeautifulSoup


class WebsiteImageParser:
    def __init__(
        self,
        url,
        save_path="test.png",
    ) -> None:
        self.url = url
        self.save_path = save_path

    def parse(self):
        """
        template method - он определяет общую структуру всего алгоритма
        """
        html = self.get_html(url=self.url)
        soup = self.get_soup(html=html)
        img_path = self.extract_img(soup=soup)
        print(f"LOG: parse image path {img_path}")
        self.save_image(img_path=img_path)
        print(f"Saved successfuly on path: {self.save_path}")

    def extract_img(self, soup):
        raise "You should write this method"

    def get_html(self, url):
        response = requests.get(url=url)
        return response.text

    def get_soup(self, html):
        soup = BeautifulSoup(html)
        return soup

    def save_image(self, img_path):
        r = requests.get(img_path, allow_redirects=True)
        with open(self.save_path, "wb") as f:
            f.write(r.content)


class WikiParser(WebsiteImageParser):
    def extract_img(self, soup):
        image = soup.find("img", class_="mw-logo-icon")
        image = image["src"]
        image = f"https://en.wikipedia.org{image}"
        return image


class GoogleParser(WebsiteImageParser):
    def extract_img(self, soup):
        image = soup.find("img")
        image = image["src"]
        image = f"https://www.google.com{image}"
        return image


wiki_parser = WikiParser(
    url="https://en.wikipedia.org/wiki/Main_Page",
    save_path="./wiki.png",
)
wiki_parser.parse()

google_parser = GoogleParser(
    url="https://www.google.com/",
    save_path="./google.png",
)

google_parser.parse()

LOG: parse image path https://en.wikipedia.org/static/images/icons/wikipedia.png
Saved successfuly on path: ./wiki.png
LOG: parse image path https://www.google.com/images/branding/googlelogo/1x/googlelogo_white_background_color_272x92dp.png
Saved successfuly on path: ./google.png


In [None]:
from abc import ABC, abstractmethod


class ParserFactory(ABC):
    @abstractmethod
    def create_parser(self, url, save_path):
        pass


class WikiParserFactory(ParserFactory):
    def create_parser(self, url, save_path):
        self.some_logic_1()
        return WikiParser(url=url, save_path=save_path)

    def some_logic_1(self):
        print("do some logic 1")


class GoogleParserFactory(ParserFactory):
    def create_parser(self, url, save_path):
        self.some_logic_2()
        return GoogleParser(url=url, save_path=save_path)

    def some_logic_2(self):
        print("do some logic 2")


wiki_factory = WikiParserFactory()
google_factory = GoogleParserFactory()

google_params = [
    ["https://www.google.com/", "./google_1.png"],
    ["https://www.google.com/", "./google_2.png"],
    ["https://www.google.com/", "./google_3.png"],
]

parsers: list[GoogleParser] = []

for param in google_params:
    url, save_path = param
    parser = google_factory.create_parser(url=url, save_path=save_path)
    parsers.append(parser)

for parser in parsers:
    parser.parse()

### Functional style


In [20]:
from functools import partial
from typing import Callable

import requests
from bs4 import BeautifulSoup


def template(
    get_html: Callable,
    get_soup: Callable,
    extract_img: Callable,
    save_image: Callable,
):
    """Template method definition"""

    html = get_html()
    soup = get_soup(html)
    img_path = extract_img(soup)
    print(f"LOG: Extracted image path {img_path}")
    save_image(img_path)
    print("Image saved successfully!")


def get_html(url: str) -> str:
    """Get HTML content"""
    response = requests.get(url)
    return response.text


def get_soup(html: str) -> BeautifulSoup:
    """Parse HTML into Beautiful Soup"""
    return BeautifulSoup(html, "html.parser")


def save_image(img_path: str, filepath: str) -> None:
    """Save image from URL to file"""
    response = requests.get(img_path)
    with open(filepath, "wb") as f:
        f.write(response.content)


def wiki_strategy(soup: BeautifulSoup) -> str:
    """Extract image strategy for Wikipedia"""
    img = soup.find("img", class_="mw-logo-icon")
    return f"https://en.wikipedia.org{img['src']}"


def google_strategy(soup: BeautifulSoup) -> str:
    """Extract image strategy for Google"""
    img = soup.find("img")
    return f"https://www.google.com{img['src']}"


parser = lambda url, filepath: partial(
    template,
    partial(get_html, url),
    get_soup=get_soup,
    extract_img=wiki_strategy,
    save_image=partial(save_image, filepath=filepath),
)

wiki_parser = parser(
    url="https://en.wikipedia.org/wiki/Main_Page",
    filepath="wiki_func.png",
)

In [22]:
from typing import Callable


def parser_factory(
    factory_logic: Callable[[], None],
) -> Callable[[str, str], object]:
    def create_parser(*args, **kwargs):
        return factory_logic(*args, **kwargs)

    return create_parser


def wiki_factory_logic(*args, **kwargs):
    print("wiki_factory_logic")
    print("I love wikipedia.")
    return parser(*args, **kwargs)


def google_parser_logic(*args, **kwargs):
    print("google_parser_logic")
    return parser(*args, **kwargs)


# Create factory functions
wiki_parser_fabric = parser_factory(wiki_factory_logic)
google_parser_fabric = parser_factory(google_parser_logic)

# Usage:

google_params = [
    # ["https://www.google.com/", "./google_1_func.png"],
    # ["https://www.google.com/", "./google_2_func.png"],
    # ["https://www.google.com/", "./google_3_func.png"],
    ["https://en.wikipedia.org/wiki/Main_Page", "./wiki_1_func.png"],
    ["https://en.wikipedia.org/wiki/Main_Page", "./wiki_2_func.png"],
    ["https://en.wikipedia.org/wiki/Main_Page", "./wiki_3_func.png"],
]

parsers = []

for url, save_path in google_params:
    # p = google_parser_fabric(url, save_path)
    p = wiki_parser_fabric(url, save_path)
    parsers.append(p)
# parsers
for p in parsers:
    p()
# # Use parsers...

wiki_factory_logic
wiki_factory_logic
wiki_factory_logic
LOG: Extracted image path https://en.wikipedia.org/static/images/icons/wikipedia.png
Image saved successfully!
LOG: Extracted image path https://en.wikipedia.org/static/images/icons/wikipedia.png
Image saved successfully!
LOG: Extracted image path https://en.wikipedia.org/static/images/icons/wikipedia.png
Image saved successfully!


In [11]:
parsers

[None, None, None]