## Scrapping & Preprocessing

In [None]:
import requests
from bs4 import BeautifulSoup
import json
import time
import os

HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

author_base_urls = [
    #"https://www.culture.ru/literature/poems/author-osip-mandelshtam",#------
    "https://www.culture.ru/literature/poems/author-anna-akhmatova",
    #"https://www.culture.ru/literature/poems/author-mikhail-lermontov",#-------
    #"https://www.culture.ru/literature/poems/author-aleksandr-pushkin", #–ø–æ—Ç–æ–º
    #"https://www.culture.ru/literature/poems/author-sergei-esenin",
    #"https://www.culture.ru/literature/poems/author-vladimir-solovev",
    #"https://www.culture.ru/literature/poems/author-nikolai-zabolockii",
    #"https://www.culture.ru/literature/poems/author-vladimir-mayakovskii",
    #"https://www.culture.ru/literature/poems/author-aleksandr-blok",
    #"https://www.culture.ru/literature/poems/author-iosif-brodskii",
    #"https://www.culture.ru/literature/poems/author-fedor-tyutchev",
    #"https://www.culture.ru/literature/poems/author-marina-cvetaeva",
    #"https://www.culture.ru/literature/poems/author-afanasii-fet",
    #"https://www.culture.ru/literature/poems/author-robert-rozhdestvenskii",
    #"https://www.culture.ru/literature/poems/author-vladimir-vysockii",
    #"https://www.culture.ru/literature/poems/author-nikolai-gumilev",
    #"https://www.culture.ru/literature/poems/author-nikolai-nekrasov",
    #"https://www.culture.ru/literature/poems/author-boris-pasternak", #----
    #"https://www.culture.ru/literature/poems/author-ivan-bunin",
    #"https://www.culture.ru/literature/poems/author-anton-delvig",
    #"https://www.culture.ru/literature/poems/author-kondratiy-rileev",
    #"https://www.culture.ru/literature/poems/author-evgenii-boratynskii-baratynskii",
    #"https://www.culture.ru/literature/poems/author-vasilii-zhukovskii",
    #"https://www.culture.ru/literature/poems/author-vilgelm-kyukhelbeker",
    #"https://www.culture.ru/literature/poems/author-nikolai-gnedich",
    #"https://www.culture.ru/literature/poems/author-velimir-hlebnikov",
    #"https://www.culture.ru/literature/poems/author-andrei-belyi"
]

all_poems = []

def get_all_pages(base_url):
    """–í–æ–∑–≤—Ä–∞—â–∞–µ—Ç —Å–ø–∏—Å–æ–∫ –≤—Å–µ—Ö —Å—Ç—Ä–∞–Ω–∏—Ü —Å—Ç–∏—Ö–æ–≤ –∞–≤—Ç–æ—Ä–∞."""
    response = requests.get(base_url, headers=HEADERS)
    soup = BeautifulSoup(response.content, 'html.parser')

    page_links = soup.select('a.CsObS[href*="page="]')
    max_page = 1
    for link in page_links:
        href = link.get("href", "")
        if "page=" in href:
            try:
                page_num = int(href.split("page=")[-1])
                max_page = max(max_page, page_num)
            except ValueError:
                pass

    url_base = base_url.split("?page=")[0]
    return [f"{url_base}?page={i}" for i in range(1, max_page + 1)]

def get_poem_links(page_url):
    response = requests.get(page_url, headers=HEADERS)
    soup = BeautifulSoup(response.content, 'html.parser')
    cards = soup.find_all("div", class_="CHPy6")
    links = []
    for card in cards:
        a_tag = card.find("a", class_="ICocV")
        if a_tag and 'href' in a_tag.attrs:
            poem_url = "https://www.culture.ru" + a_tag['href']
            links.append(poem_url)
    return links

def parse_poem(poem_url):
    response = requests.get(poem_url, headers=HEADERS)
    soup = BeautifulSoup(response.content, 'html.parser')

    author_tag = soup.find("div", class_="HjkFX")
    author = author_tag.get_text(strip=True) if author_tag else "Unknown"

    title_tag = soup.find("div", class_="rrWFt")
    title = title_tag.get_text(strip=True) if title_tag else "Untitled"

    text_blocks = soup.find_all("div", attrs={"data-content": "text"})
    stanzas = []
    total_lines = 0

    for block in text_blocks:
        stanza_lines = []
        for elem in block.children:
            if isinstance(elem, str):
                text = elem.strip()
                if text:
                    stanza_lines.append(text)
            elif elem.name == "br":
                stanza_lines.append("\n")
            elif elem.name:
                text = elem.get_text(strip=True)
                if text:
                    stanza_lines.append(text)

        stanza_text = "".join(stanza_lines).strip()
        if stanza_text:
            stanzas.append(stanza_text)

    # –°–æ–±–∏—Ä–∞–µ–º –ø–æ–ª–Ω—ã–π —Ç–µ–∫—Å—Ç
    text = "\n\n".join(stanzas)

    # –¢–µ–ø–µ—Ä—å –∞–∫–∫—É—Ä–∞—Ç–Ω–æ —Å—á–∏—Ç–∞–µ–º —Å—Ç—Ä–æ–∫–∏:
    total_lines = text.count('\n') + 1

    return {
        "author": author,
        "title": title,
        "text": text,
        "count": total_lines,
        "url": poem_url
    }

# –ì–ª–∞–≤–Ω—ã–π —Ü–∏–∫–ª
for author_base_url in author_base_urls:
    print(f"–û–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ–º –∞–≤—Ç–æ—Ä–∞: {author_base_url}")
    author_pages = get_all_pages(author_base_url)
    for page_url in author_pages:
        print(f"  –°—Ç—Ä–∞–Ω–∏—Ü–∞: {page_url}")
        poem_links = get_poem_links(page_url)
        for poem_url in poem_links:
            try:
                print(f"    –°—Ç–∏—Ö: {poem_url}")
                poem_data = parse_poem(poem_url)
                all_poems.append(poem_data)
                time.sleep(1)
            except Exception as e:
                print(f"    –û—à–∏–±–∫–∞ –ø—Ä–∏ –æ–±—Ä–∞–±–æ—Ç–∫–µ {poem_url}: {e}")

# –°–æ—Ö—Ä–∞–Ω—è–µ–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç
with open("akhmatova.json", "w", encoding="utf-8") as f:
    json.dump(all_poems, f, ensure_ascii=False, indent=2)

print("–°–±–æ—Ä –∑–∞–≤–µ—Ä—à—ë–Ω. –°—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã")

In [None]:
import requests
from bs4 import BeautifulSoup
import json
import time
import os

HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

author_base_urls = [
    #"https://www.culture.ru/literature/poems/author-osip-mandelshtam",#------
    #"https://www.culture.ru/literature/poems/author-anna-akhmatova",
    #"https://www.culture.ru/literature/poems/author-mikhail-lermontov",#-------
    #"https://www.culture.ru/literature/poems/author-aleksandr-pushkin", #–ø–æ—Ç–æ–º
    #"https://www.culture.ru/literature/poems/author-sergei-esenin",
    #"https://www.culture.ru/literature/poems/author-vladimir-solovev",
    #"https://www.culture.ru/literature/poems/author-vladimir-mayakovskii",
    #"https://www.culture.ru/literature/poems/author-aleksandr-blok",
    #"https://www.culture.ru/literature/poems/author-iosif-brodskii",
    #"https://www.culture.ru/literature/poems/author-fedor-tyutchev",
    #"https://www.culture.ru/literature/poems/author-marina-cvetaeva",
    #"https://www.culture.ru/literature/poems/author-afanasii-fet",
    #"https://www.culture.ru/literature/poems/author-robert-rozhdestvenskii",
    #"https://www.culture.ru/literature/poems/author-vladimir-vysockii",
    #"https://www.culture.ru/literature/poems/author-nikolai-gumilev",
    #"https://www.culture.ru/literature/poems/author-nikolai-nekrasov",
    #"https://www.culture.ru/literature/poems/author-boris-pasternak", #----
    #"https://www.culture.ru/literature/poems/author-ivan-bunin",
    #"https://www.culture.ru/literature/poems/author-anton-delvig",
    #"https://www.culture.ru/literature/poems/author-kondratiy-rileev",
    #"https://www.culture.ru/literature/poems/author-evgenii-boratynskii-baratynskii",
    #"https://www.culture.ru/literature/poems/author-vasilii-zhukovskii",
    #"https://www.culture.ru/literature/poems/author-vilgelm-kyukhelbeker",
    #"https://www.culture.ru/literature/poems/author-nikolai-gnedich",
    #"https://www.culture.ru/literature/poems/author-velimir-hlebnikov",
    #"https://www.culture.ru/literature/poems/author-andrei-belyi"
    "https://www.culture.ru/literature/poems/author-aleksandr-pushkin/tag-korotkie"
]

def get_all_pages(base_url):
    response = requests.get(base_url, headers=HEADERS)
    soup = BeautifulSoup(response.content, 'html.parser')

    page_links = soup.select('a.CsObS[href*="page="]')
    max_page = 1
    for link in page_links:
        href = link.get("href", "")
        if "page=" in href:
            try:
                page_num = int(href.split("page=")[-1])
                max_page = max(max_page, page_num)
            except ValueError:
                pass

    url_base = base_url.split("?page=")[0]
    return [f"{url_base}?page={i}" for i in range(1, max_page + 1)]

def get_poem_links(page_url):
    try:
        response = requests.get(page_url, headers=HEADERS)
        soup = BeautifulSoup(response.content, 'html.parser')
        cards = soup.find_all("div", class_="CHPy6")
        links = []
        for card in cards:
            a_tag = card.find("a", class_="ICocV")
            if a_tag and 'href' in a_tag.attrs:
                poem_url = "https://www.culture.ru" + a_tag['href']
                links.append(poem_url)
        return links
    except Exception as e:
        print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –∑–∞–≥—Ä—É–∑–∫–µ —Å—Ç—Ä–∞–Ω–∏—Ü—ã {page_url}: {e}")
        return []

def parse_poem(poem_url):
    response = requests.get(poem_url, headers=HEADERS)
    soup = BeautifulSoup(response.content, 'html.parser')

    author_tag = soup.find("div", class_="HjkFX")
    author = author_tag.get_text(strip=True) if author_tag else "Unknown"

    title_tag = soup.find("div", class_="rrWFt")
    title = title_tag.get_text(strip=True) if title_tag else "Untitled"

    text_blocks = soup.find_all("div", attrs={"data-content": "text"})
    stanzas = []

    for block in text_blocks:
        stanza_lines = []
        for elem in block.children:
            if isinstance(elem, str):
                text = elem.strip()
                if text:
                    stanza_lines.append(text)
            elif elem.name == "br":
                stanza_lines.append("\n")
            elif elem.name:
                text = elem.get_text(strip=True)
                if text:
                    stanza_lines.append(text)
        stanza_text = "".join(stanza_lines).strip()
        if stanza_text:
            stanzas.append(stanza_text)

    text = "\n\n".join(stanzas)
    total_lines = text.count('\n') + 1

    return {
        "author": author,
        "title": title,
        "text": text,
        "count": total_lines,
        "url": poem_url
    }

def save_poems(poems, filename_base, part_number):
    filename = f"{filename_base}_part{part_number}.json"
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(poems, f, ensure_ascii=False, indent=2)
    print(f"–°–æ—Ö—Ä–∞–Ω–µ–Ω–æ: {filename}")

# –ì–ª–∞–≤–Ω—ã–π —Ü–∏–∫–ª
for base_url in author_base_urls:
    raw_slug = base_url.split("/")[-1]  # –Ω–∞–ø—Ä–∏–º–µ—Ä, "author-anna-akhmatova"
    filename_base = raw_slug.replace("author-", "")  # "anna-akhmatova"
    print(f"–û–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ–º: {filename_base}")

    all_pages = get_all_pages(base_url)
    page_chunks = [all_pages[i:i + 10] for i in range(0, len(all_pages), 10)]

    for i, chunk in enumerate(page_chunks, start=1):
        poems_chunk = []
        print(f"  –ë–ª–æ–∫ {i} (—Å—Ç—Ä–∞–Ω–∏—Ü—ã {chunk[0]} ... {chunk[-1]})")

        for page_url in chunk:
            poem_links = get_poem_links(page_url)
            for poem_url in poem_links:
                try:
                    print(f"    –û–±—Ä–∞–±–æ—Ç–∫–∞: {poem_url}")
                    poem = parse_poem(poem_url)
                    poems_chunk.append(poem)
                    time.sleep(1)
                except Exception as e:
                    print(f"    –û—à–∏–±–∫–∞ –ø—Ä–∏ —Ä–∞–∑–±–æ—Ä–µ {poem_url}: {e}")

        if poems_chunk:
            save_poems(poems_chunk, filename_base, i)

print("–°–±–æ—Ä –∑–∞–≤–µ—Ä—à—ë–Ω.")

In [None]:
!pip install pillow

### json merging

In [None]:
import json
import os

MAX_LENGTH = 14

# –ü–∞–ø–∫–∞, –≥–¥–µ –ª–µ–∂–∞—Ç JSON-—Ñ–∞–π–ª—ã —Å –ø–æ—ç—Ç–∞–º–∏
folder_path = "separately"
output_file = "short_poems_corpus.json"

all_poems = []

# –ü—Ä–æ—Ö–æ–¥–∏–º –ø–æ –∫–∞–∂–¥–æ–º—É .json —Ñ–∞–π–ª—É –≤ –ø–∞–ø–∫–µ
for filename in os.listdir(folder_path):
    if filename.endswith(".json"):
        file_path = os.path.join(folder_path, filename)
        print(f"üì• –û–±—Ä–∞–±–æ—Ç–∫–∞ {file_path}")
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                poems = json.load(f)
                # –î–æ–±–∞–≤–ª—è–µ–º —Ç–æ–ª—å–∫–æ –∫–æ—Ä–æ—Ç–∫–∏–µ
                filtered = [poem for poem in poems if poem.get("count", 0) <= MAX_LENGTH]
                all_poems.extend(filtered)
        except Exception as e:
            print(f"–û—à–∏–±–∫–∞ –≤ {filename}: {e}")

# –°–æ—Ö—Ä–∞–Ω—è–µ–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(all_poems, f, ensure_ascii=False, indent=2)

print(f"\n –ì–æ—Ç–æ–≤–æ. –°–æ—Ö—Ä–∞–Ω–µ–Ω–æ {len(all_poems)} —Å—Ç–∏—Ö–æ–≤ –≤ {output_file}")

## App

#### –∏–º–ø–æ—Ä—Ç

In [None]:
import random
from PIL import Image, ImageDraw, ImageFont, ImageEnhance
import json

#### test

In [None]:
text = "I wandered lonely as a cloud\nThat floats on high o'er vales and hills"

# –†–∞–∑–º–µ—Ä —ç–∫—Ä–∞–Ω–∞ iPhone 13
width, height = 1170, 2532
background_color = (0, 0, 0)
text_color = (255, 255, 255)
font_size = 60

# –û—Ç—Å—Ç—É–ø—ã –ø–æ –∫—Ä–∞—è–º
margin = 10  # –ø–∏–∫—Å–µ–ª–µ–π —Å–ª–µ–≤–∞ –∏ —Å–ø—Ä–∞–≤–∞

# –ó–∞–≥—Ä—É–∂–∞–µ–º —à—Ä–∏—Ñ—Ç
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)

img = Image.new("RGB", (width, height), background_color)
draw = ImageDraw.Draw(img)

# –ì–æ—Ç–æ–≤–∏–º —Å—Ç—Ä–æ–∫–∏
lines = text.split('\n')
line_height = font.getbbox("A")[3] + 20
y = height // 2 - len(lines) * line_height // 2

for line in lines:
    line_width = font.getlength(line)
    # –¢–µ–ø–µ—Ä—å —Ü–µ–Ω—Ç—Ä–∏—Ä—É–µ–º –Ω–µ –ø–æ –≤—Å–µ–π —à–∏—Ä–∏–Ω–µ, –∞ –≤ —Ä–∞–º–∫–µ –º–µ–∂–¥—É –æ—Ç—Å—Ç—É–ø–∞–º–∏
    max_text_width = width - 2 * margin
    x = margin + (max_text_width - line_width) // 2
    draw.text((x, y), line, font=font, fill=text_color)
    y += line_height

img.save("english_poem_with_margins2.jpg")

In [None]:
# –ü–∞—Ä–∞–º–µ—Ç—Ä—ã —ç–∫—Ä–∞–Ω–∞ iPhone 13
width, height = 1170, 2532
background_color = (0, 0, 0)
text_color = (255, 255, 255)
font_size = 40
margin = 2  # –£–≤–µ–ª–∏—á–∏–º –æ—Ç—Å—Ç—É–ø—ã –¥–ª—è –∫—Ä–∞—Å–∏–≤–æ–≥–æ –≤–∏–¥–∞

# –ó–∞–≥—Ä—É–∂–∞–µ–º —à—Ä–∏—Ñ—Ç —Å –∑–∞—Å–µ—á–∫–∞–º–∏, –ø–æ–¥–¥–µ—Ä–∂–∏–≤–∞—é—â–∏–π –∫–∏—Ä–∏–ª–ª–∏—Ü—É
font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf"
font = ImageFont.truetype(font_path, font_size)

# –ó–∞–≥—Ä—É–∂–∞–µ–º –∫–æ—Ä–ø—É—Å
with open("poems2.json", "r", encoding="utf-8") as f:
    poems = json.load(f)

# –§–∏–ª—å—Ç—Ä—É–µ–º —Å—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏—è —Å 9 —Å—Ç—Ä–æ–∫–∞–º–∏
poems_9_lines = [poem for poem in poems if poem["count"] == 14]

# –ü—Ä–æ–≤–µ—Ä–∫–∞: –µ—Å—Ç—å –ª–∏ —Ç–∞–∫–∏–µ —Å—Ç–∏—Ö–∏
if not poems_9_lines:
    raise ValueError("–ù–µ—Ç —Å—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–π —Å 9 —Å—Ç—Ä–æ–∫–∞–º–∏ –≤ –∫–æ—Ä–ø—É—Å–µ.")

# –í—ã–±–∏—Ä–∞–µ–º —Å–ª—É—á–∞–π–Ω–æ–µ —Å—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–µ
selected_poem = random.choice(poems_9_lines)
text = selected_poem["text"]

# –°–æ–∑–¥–∞–µ–º –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ
img = Image.new("RGB", (width, height), background_color)
draw = ImageDraw.Draw(img)

# –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ —Å—Ç—Ä–æ–∫
lines = text.split('\n')
line_height = font.getbbox("–ê")[3] + 20  # "–ê" –¥–ª—è –∫–∏—Ä–∏–ª–ª–∏—Ü—ã
y = height // 2 - len(lines) * line_height // 2

# –û—Ç—Ä–∏—Å–æ–≤–∫–∞ —Ç–µ–∫—Å—Ç–∞
for line in lines:
    line_width = font.getlength(line)
    max_text_width = width - 2 * margin
    x = margin + (max_text_width - line_width) // 2
    draw.text((x, y), line, font=font, fill=text_color)
    y += line_height

# –°–æ—Ö—Ä–∞–Ω—è–µ–º
img.save("russian_poem_wallpaper1.jpg")

print(f"–ì–æ—Ç–æ–≤–æ! –ò—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–æ —Å—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–µ: ¬´{selected_poem['title']}¬ª –æ—Ç {selected_poem['author']}")

In [None]:
# –ü–∞—Ä–∞–º–µ—Ç—Ä—ã
font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf"
font_size = 40
text_color = (255, 255, 255)
rectangle_color = (0, 0, 0, 180)  # –ø–æ–ª—É–ø—Ä–æ–∑—Ä–∞—á–Ω—ã–π —á—ë—Ä–Ω—ã–π
margin = 50
line_spacing = 20

# –ó–∞–≥—Ä—É–∂–∞–µ–º –∫–æ—Ä–ø—É—Å
with open("short_poems_corpus.json", "r", encoding="utf-8") as f:
    poems = json.load(f)

# –§–∏–ª—å—Ç—Ä—É–µ–º –ø–æ –¥–ª–∏–Ω–µ (–º–æ–∂–Ω–æ –∏–∑–º–µ–Ω–∏—Ç—å —á–∏—Å–ª–æ)
poems_filtered = [p for p in poems if p["count"] == 14]
if not poems_filtered:
    raise ValueError("–ù–µ—Ç –ø–æ–¥—Ö–æ–¥—è—â–∏—Ö —Å—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–π.")

# –í—ã–±–∏—Ä–∞–µ–º —Å—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–µ
selected_poem = random.choice(poems_filtered)
lines = selected_poem["text"].split('\n')

# –ó–∞–≥—Ä—É–∂–∞–µ–º —Ñ–æ–Ω–æ–≤–æ–µ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ
background = Image.open("/Users/ilamusatkin/Diploma Code/Poetry/IMG_0635.JPG").convert("RGBA")
width, height = background.size

# –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ —Ç–µ–∫—Å—Ç–∞
font = ImageFont.truetype(font_path, font_size)
draw = ImageDraw.Draw(background)

# –í—ã—á–∏—Å–ª–µ–Ω–∏–µ —Ä–∞–∑–º–µ—Ä–æ–≤ —Ç–µ–∫—Å—Ç–∞
line_heights = []
line_widths = []
for line in lines:
    bbox = font.getbbox(line)
    h = bbox[3] - bbox[1]
    w = font.getlength(line)
    line_heights.append(h)
    line_widths.append(w)

total_height = sum(line_heights) + line_spacing * (len(lines) - 1)
max_width = max(line_widths)

# –ö–æ–æ—Ä–¥–∏–Ω–∞—Ç—ã –ø—Ä—è–º–æ—É–≥–æ–ª—å–Ω–∏–∫–∞
x1 = (width - max_width) // 2 - margin
y1 = (height - total_height) // 2 - margin
x2 = (width + max_width) // 2 + margin
y2 = (height + total_height) // 2 + margin

# –ù–∞–∫–ª–∞–¥—ã–≤–∞–µ–º –ø–æ–ª—É–ø—Ä–æ–∑—Ä–∞—á–Ω—ã–π –ø—Ä—è–º–æ—É–≥–æ–ª—å–Ω–∏–∫
overlay = Image.new("RGBA", background.size, (0, 0, 0, 0))
overlay_draw = ImageDraw.Draw(overlay)
overlay_draw.rectangle([x1, y1, x2, y2], fill=rectangle_color)
background = Image.alpha_composite(background, overlay)

# –ù–∞–Ω–æ—Å–∏–º —Ç–µ–∫—Å—Ç
y_text = y1 + margin
for i, line in enumerate(lines):
    line_width = font.getlength(line)
    x_text = (width - line_width) // 2
    draw.text((x_text, y_text), line, font=font, fill=text_color)
    y_text += line_heights[i] + line_spacing

# –°–æ—Ö—Ä–∞–Ω—è–µ–º
output = background.convert("RGB")
output.save("russian_poem_wallpaper2.jpg")

print(f"–ì–æ—Ç–æ–≤–æ! –°—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–µ: ¬´{selected_poem['title']}¬ª –æ—Ç {selected_poem['author']}")

In [None]:
!pip install --upgrade Pillow

In [None]:
import json
import random
from PIL import Image, ImageDraw, ImageFont

# === –ø–∞—Ä–∞–º–µ—Ç—Ä—ã ===

CORPUS_PATH = "short_poems_corpus.json"
BACKGROUND_PATH = "/Users/ilamusatkin/Diploma Code/Poetry/IMG_0635.JPG"
OUTPUT_PATH = "russian_poem_wallpaper_final.jpg"
FONT_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf"

SCREEN_WIDTH = 1170
SCREEN_HEIGHT = 2532

FONT_SIZE = 40
LINE_SPACING = 20
MARGIN_TEXT = 50
MARGIN_TOP = 250     # –æ—Ç—Å—Ç—É–ø –æ—Ç –≤–µ—Ä—Ö–Ω–µ–≥–æ –∫—Ä–∞—è –¥–æ –Ω–∞–∑–≤–∞–Ω–∏—è
MARGIN_BOTTOM = 200  # –æ—Ç—Å—Ç—É–ø –æ—Ç –Ω–∏–∂–Ω–µ–≥–æ –∫—Ä–∞—è –¥–æ –∞–≤—Ç–æ—Ä–∞

TEXT_COLOR = (255, 255, 255)
RECTANGLE_COLOR = (0, 0, 0, 180)

LINE_COUNT_FILTER = 14


# === –∑–∞–≥—Ä—É–∑–∫–∞ –∫–æ—Ä–ø—É—Å–∞ ===

with open(CORPUS_PATH, "r", encoding="utf-8") as f:
    poems = json.load(f)

poems_filtered = [p for p in poems if p["count"] == LINE_COUNT_FILTER]
if not poems_filtered:
    raise ValueError("–ù–µ—Ç –ø–æ–¥—Ö–æ–¥—è—â–∏—Ö —Å—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–π.")

selected_poem = random.choice(poems_filtered)
lines = selected_poem["text"].split('\n')
title = selected_poem["title"]
author = selected_poem["author"]


# === –ø–æ–¥–≥–æ—Ç–æ–≤–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è ===

background = Image.open(BACKGROUND_PATH).convert("RGBA")
background = background.resize((SCREEN_WIDTH, SCREEN_HEIGHT))

font = ImageFont.truetype(FONT_PATH, FONT_SIZE)
italic_font = ImageFont.truetype(FONT_PATH, FONT_SIZE)

draw = ImageDraw.Draw(background)


# === –ø–æ–¥–≥–æ—Ç–æ–≤–∫–∞ —Ç–µ–∫—Å—Ç–∞ ===

def get_text_size(text, font):
    bbox = font.getbbox(text)
    width = font.getlength(text)
    height = bbox[3] - bbox[1]
    return width, height


line_sizes = [get_text_size(line, font) for line in lines]
line_heights = [h for _, h in line_sizes]
total_height = sum(line_heights) + LINE_SPACING * (len(lines) - 1)

title_width, title_height = get_text_size(title, italic_font)
author_width, author_height = get_text_size(author, italic_font)

y_start = MARGIN_TOP + title_height + LINE_SPACING
y_end = SCREEN_HEIGHT - MARGIN_BOTTOM - author_height

available_height = y_end - y_start
if total_height > available_height:
    raise ValueError("–°–ª–∏—à–∫–æ–º –¥–ª–∏–Ω–Ω–æ–µ —Å—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–µ –¥–ª—è –∑–∞–¥–∞–Ω–Ω—ã—Ö –æ—Ç—Å—Ç—É–ø–æ–≤.")


# === –∑–∞—Ç–µ–º–Ω—è–µ–º —Ñ–æ–Ω –≤ –Ω—É–∂–Ω–æ–π –æ–±–ª–∞—Å—Ç–∏ ===

rectangle_top = MARGIN_TOP
rectangle_bottom = SCREEN_HEIGHT - MARGIN_BOTTOM
rectangle_left = MARGIN_TEXT
rectangle_right = SCREEN_WIDTH - MARGIN_TEXT

overlay = Image.new("RGBA", background.size, (0, 0, 0, 0))
overlay_draw = ImageDraw.Draw(overlay)
overlay_draw.rectangle(
    [rectangle_left, rectangle_top, rectangle_right, rectangle_bottom],
    fill=RECTANGLE_COLOR
)
background = Image.alpha_composite(background, overlay)


# === –Ω–∞–Ω–æ—Å–∏–º —Ç–µ–∫—Å—Ç ===

draw = ImageDraw.Draw(background)

# –Ω–∞–∑–≤–∞–Ω–∏–µ
x_title = (SCREEN_WIDTH - title_width) // 2
draw.text((x_title, MARGIN_TOP), title, font=italic_font, fill=TEXT_COLOR)

# —Å—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–µ
y_text = y_start
for i, line in enumerate(lines):
    line_width, line_height = get_text_size(line, font)
    x_text = (SCREEN_WIDTH - line_width) // 2
    draw.text((x_text, y_text), line, font=font, fill=TEXT_COLOR)
    y_text += line_height + LINE_SPACING

# –∞–≤—Ç–æ—Ä
x_author = SCREEN_WIDTH - MARGIN_TEXT - author_width
y_author = SCREEN_HEIGHT - MARGIN_BOTTOM
draw.text((x_author, y_author), author, font=italic_font, fill=TEXT_COLOR)


# === —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ ===

output = background.convert("RGB")
output.save(OUTPUT_PATH)

print(f"–ì–æ—Ç–æ–≤–æ! –°—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–µ: ¬´{title}¬ª –æ—Ç {author}")

### 28.05

In [None]:
!pip install ipywidgets pillow matplotlib

In [None]:
import json
import random
from PIL import Image, ImageDraw, ImageFont
from io import BytesIO
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import numpy as np



# –ù–∞—Å—Ç—Ä–æ–π–∫–∏ –ø–æ —É–º–æ–ª—á–∞–Ω–∏—é
DEFAULT_SETTINGS = {
    "CORPUS_PATH": "short_poems_corpus.json",
    "FONT_PATH": "/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf",
    "SCREEN_WIDTH": 1170,
    "SCREEN_HEIGHT": 2532,
    "FONT_SIZE": 40,
    "LINE_SPACING": 20,
    "MARGIN_TEXT": 50,
    "MARGIN_TOP": 250,
    "MARGIN_BOTTOM": 200,
    "TEXT_COLOR": (255, 255, 255),
    "RECTANGLE_COLOR": (0, 0, 0, 180),
    "LINE_COUNT_FILTER": 14
}

# –ó–∞–≥—Ä—É–∑–∫–∞ –∫–æ—Ä–ø—É—Å–∞
def load_poems():
    try:
        with open(DEFAULT_SETTINGS["CORPUS_PATH"], "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception as e:
        print(f"–û—à–∏–±–∫–∞ –∑–∞–≥—Ä—É–∑–∫–∏ –∫–æ—Ä–ø—É—Å–∞: {e}")
        return []

poems = load_poems()

# –§—É–Ω–∫—Ü–∏—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ–±–æ–µ–≤
def generate_wallpaper(poem, bg_image=None, settings=DEFAULT_SETTINGS):
    if bg_image is None:
        bg_image = Image.new("RGB", (settings["SCREEN_WIDTH"], settings["SCREEN_HEIGHT"]), "black")
    else:
        bg_image = bg_image.resize((settings["SCREEN_WIDTH"], settings["SCREEN_HEIGHT"]))
    
    lines = poem["text"].split('\n')
    title = poem["title"]
    author = poem["author"]
    
    try:
        font = ImageFont.truetype(settings["FONT_PATH"], settings["FONT_SIZE"])
        italic_font = ImageFont.truetype(settings["FONT_PATH"], settings["FONT_SIZE"])
    except:
        font = ImageFont.load_default()
        italic_font = ImageFont.load_default()
    
    draw = ImageDraw.Draw(bg_image)
    
    def get_text_size(text, font):
        bbox = font.getbbox(text)
        return font.getlength(text), bbox[3] - bbox[1]
    
    line_sizes = [get_text_size(line, font) for line in lines]
    total_height = sum(h for _, h in line_sizes) + settings["LINE_SPACING"] * (len(lines) - 1)
    
    title_size = get_text_size(title, italic_font)
    author_size = get_text_size(author, italic_font)
    
    y_start = settings["MARGIN_TOP"] + title_size[1] + settings["LINE_SPACING"]
    y_end = settings["SCREEN_HEIGHT"] - settings["MARGIN_BOTTOM"] - author_size[1]
    
    overlay = Image.new("RGBA", bg_image.size, (0, 0, 0, 0))
    overlay_draw = ImageDraw.Draw(overlay)
    overlay_draw.rectangle(
        [settings["MARGIN_TEXT"], settings["MARGIN_TOP"],
         settings["SCREEN_WIDTH"] - settings["MARGIN_TEXT"], 
         settings["SCREEN_HEIGHT"] - settings["MARGIN_BOTTOM"]],
        fill=settings["RECTANGLE_COLOR"]
    )
    bg_image = Image.alpha_composite(bg_image, overlay)
    
    draw = ImageDraw.Draw(bg_image)
    x_title = (settings["SCREEN_WIDTH"] - title_size[0]) // 2
    draw.text((x_title, settings["MARGIN_TOP"]), title, font=italic_font, fill=settings["TEXT_COLOR"])
    
    y_text = y_start
    for line in lines:
        line_width, line_height = get_text_size(line, font)
        x_text = (settings["SCREEN_WIDTH"] - line_width) // 2
        draw.text((x_text, y_text), line, font=font, fill=settings["TEXT_COLOR"])
        y_text += line_height + settings["LINE_SPACING"]
    
    x_author = settings["SCREEN_WIDTH"] - settings["MARGIN_TEXT"] - author_size[0]
    y_author = settings["SCREEN_HEIGHT"] - settings["MARGIN_BOTTOM"]
    draw.text((x_author, y_author), author, font=italic_font, fill=settings["TEXT_COLOR"])
    
    return bg_image.convert("RGB")

# –°–æ–∑–¥–∞–µ–º –∏–Ω—Ç–µ—Ä—Ñ–µ–π—Å
output = widgets.Output()
poem_selector = widgets.Dropdown(options=[p['title'] for p in poems], description='–°—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–µ:')
author_selector = widgets.Dropdown(options=sorted(list(set(p['author'] for p in poems))), description='–ê–≤—Ç–æ—Ä:')
generate_btn = widgets.Button(description="–°–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞—Ç—å –æ–±–æ–∏")
random_btn = widgets.Button(description="–°–ª—É—á–∞–π–Ω–æ–µ —Å—Ç–∏—Ö–æ—Ç–≤–æ—Ä–µ–Ω–∏–µ")

font_size_slider = widgets.IntSlider(value=40, min=20, max=80, description='–†–∞–∑–º–µ—Ä —à—Ä–∏—Ñ—Ç–∞:')
text_color_picker = widgets.ColorPicker(description='–¶–≤–µ—Ç —Ç–µ–∫—Å—Ç–∞:', value='white')
opacity_slider = widgets.IntSlider(value=180, min=0,max=255, description='–ü—Ä–æ–∑—Ä–∞—á–Ω–æ—Å—Ç—å –ø–æ–¥–ª–æ–∂–∫–∏:')

# –û–±—Ä–∞–±–æ—Ç—á–∏–∫–∏ —Å–æ–±—ã—Ç–∏–π
def on_generate_click(b):
    with output:
        clear_output()
        selected_poem = next(p for p in poems if p['title'] == poem_selector.value)
        settings = DEFAULT_SETTINGS.copy()
        settings.update({
            "FONT_SIZE": font_size_slider.value,
            "TEXT_COLOR": tuple(int(text_color_picker.value.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)),
            "RECTANGLE_COLOR": (0, 0, 0, opacity_slider.value)
        })
        
        img = generate_wallpaper(selected_poem, settings=settings)
        plt.figure(figsize=(10, 20))
        plt.imshow(np.array(img))
        plt.axis('off')
        plt.show()
        
        buf = BytesIO()
        img.save(buf, format="JPEG", quality=95)
        display(widgets.HTML(
            f'<a download="{selected_poem["title"]}.jpg" href="data:image/jpeg;base64,{buf.getvalue().hex()}" target="_blank">'
            '–°–∫–∞—á–∞—Ç—å –æ–±–æ–∏</a>'
        ))

def on_random_click(b):
    poem = random.choice(poems)
    poem_selector.value = poem['title']
    author_selector.value = poem['author']
    on_generate_click(b)

def on_author_change(change):
    if change['name'] == 'value':
        author_poems = [p for p in poems if p['author'] == change['new']]
        poem_selector.options = [p['title'] for p in author_poems]

generate_btn.on_click(on_generate_click)
random_btn.on_click(on_random_click)
author_selector.observe(on_author_change)

# –û—Ç–æ–±—Ä–∞–∂–∞–µ–º –∏–Ω—Ç–µ—Ä—Ñ–µ–π—Å
display(widgets.VBox([
    widgets.HBox([random_btn]),
    widgets.HBox([author_selector, poem_selector]),
    font_size_slider,
    text_color_picker,
    opacity_slider,
    generate_btn,
    output
]))