### Web Scrapper

In [1]:
import requests
from bs4 import BeautifulSoup

data = requests.get("https://heatherbcooper.substack.com/ ")
soup = BeautifulSoup(data.content, 'html.parser')

holders = soup.select('div', class_='portable-archive-list')
print(holders)

[<div id="entry">
<div class="main typography use-theme-bg" id="main"><div class="pencraft pc-display-contents pc-reset pubTheme-yiXxQA"><div class="main-menu animated with-nav with-wordmark" data-testid="navbar"><div class="main-menu-content"><div class="topbar"><div class="topbar-content"><div class="navbar-logo-container"><a href="/"><img class="navbar-logo" src="https://substackcdn.com/image/fetch/w_96,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdbe06c1-7a72-4d0e-b45e-484bd860fac6_500x500.png"/></a></div><h1 class="navbar-title left-align loading"><a class="navbar-title-link" href="/"><img alt="Visually AI by Heather Cooper" class="navbar-logo-wordmark" src="https://substackcdn.com/image/fetch/e_trim:10:white/e_trim:10:transparent/h_72,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F70959879-af34-4111-bce8-3840073316e6_672x128.png"/></a>

In [2]:
import re

keywords= ['ai','artificial intelligence','fashion','retail','creative technology', 'fashion innovation', 'ml', 'machine learning', 'generative ai', 'gan ai', 'comfy ui', '2d to 3d', 'text to 3d', 'technology conferences', 'garment', 'clothing', 'technology and design', 'fashion design', 'creative computing' ]
base_url = "https://heatherbcooper.substack.com/ "
headers = {'User-Agent': 'Mozilla/5.0'}

response = requests.get(base_url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

urls = []

for link in soup.find_all('a', href=True):
    href = link['href']

    if href.endswith('/comments'):
        continue

    if any(re.search(rf'\b{re.escape(keyword)}\b', href, re.IGNORECASE) for keyword in keywords):
        if href.startswith('http'):
            urls.append(href)
        else:
            urls.append(base_url.rstrip('/') + '/' + href.lstrip('/'))

urls = list(set(urls))

print(urls)

['https://heatherbcooper.substack.com/p/my-ai-favorites-this-year', 'https://heatherbcooper.substack.com/p/deepseek-shakes-the-world-of-ai', 'https://heatherbcooper.substack.com/p/evolving-tools-redefine-ai-video', 'https://heatherbcooper.substack.com/p/meet-lumas-new-ai-video-tool-ray2', 'https://heatherbcooper.substack.com/p/discover-these-essential-ai-tools', 'https://heatherbcooper.substack.com/p/multi-modal-ai-video-creation-simplified', 'https://heatherbcooper.substack.com/p/this-week-in-ai-3d-from-images-video']


In [141]:
previews = []

for article in tqdm(urls):
    try:
        data = requests.get(article, headers=headers)
        soup = BeautifulSoup(data.content, 'html.parser')

        # Extract title
        title_tag = soup.find('h1')
        title = title_tag.text.strip() if title_tag else "No Title"

        # Extract subtitle
        subtitle_tag = soup.find('h3')
        subtitle = subtitle_tag.text.strip() if subtitle_tag else "No Subtitle"

        # Locate article content area to extract relevant images
        content_divs = soup.find_all(['div', 'article'], class_=re.compile(r'(content|article|post|entry)', re.IGNORECASE))

        image = None
        for div in content_divs:
            img_tags = div.find_all('img', src=True)  # Only images with actual sources
            for img in img_tags:
                if 'srcset' in img.attrs:
                    image = img['srcset'].split(" ")[0]  # Get the highest-quality image
                    break
                elif 'data-src' in img.attrs:  # Some sites use lazy-loaded images
                    image = img['data-src']
                    break
                elif 'src' in img.attrs:
                    image = img['src']
                    break

            if image:  # Stop searching once a valid image is found
                break

        article_preview = {
            'title': title,
            'subtitle': subtitle,
            'image': image if image else "No Relevant Image Found"
        }
        previews.append(article_preview)

    except Exception as e:
        print(f"Error processing {article}: {e}")

100%|██████████| 7/7 [00:01<00:00,  5.45it/s]


In [142]:
print(previews)

[{'title': '', 'subtitle': 'Simplify your video workflow with AI', 'image': 'https://substackcdn.com/image/fetch/w_96,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdbe06c1-7a72-4d0e-b45e-484bd860fac6_500x500.png'}, {'title': '', 'subtitle': '& Pika 2.1 new update is here.', 'image': 'https://substackcdn.com/image/fetch/w_96,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdbe06c1-7a72-4d0e-b45e-484bd860fac6_500x500.png'}, {'title': '', 'subtitle': 'From 3D worlds to consistent characters, explore this week’s AI trends', 'image': 'https://substackcdn.com/image/fetch/w_96,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdbe06c1-7a72-4d0e-b45e-484bd860fac6_500x500.png'}, {'title': '', 'subtitle': 'Making AI work for you daily', 'image': 'https://substackcdn.com/image/fetch/w_9

### Newsletter Template

In [143]:
template = open('email.html')
soup = BeautifulSoup(template.read(), "html.parser")

In [144]:
article_template = soup.find('div', attrs={'class':'columns'})
html_start = str(soup)[:str(soup).find(str(article_template))]
html_end = str(soup)[str(soup).find(str(article_template))+len(str(article_template)):]
html_start = html_start.replace('\n','')
html_end = html_end.replace('\n','')

In [145]:
newsletter_content = ""
for i,article in enumerate(previews):
    
    try:
        img = article_template.img
        img['src'] = article['image']
        article_template.img.replace_with(img)
    except:
        pass
    
    title = article_template.h1
    title.string = article['title'][:300]
    
    subtitle = article_template.p
    subtitle.string = article['subtitle'][:300] + "..."
    
    link = article_template.a
    link['href'] = urls[i]
    link.string = urls[i]
    article_template.a.replace_with(link)
    
    
    newsletter_content += str(article_template).replace('\n','')

email_content = html_start + newsletter_content + html_end

In [167]:
print(BeautifulSoup(email_content).prettify())

soup = BeautifulSoup(email_content, "html.parser")

html_output = soup.prettify()

filename = "newsletter_output.html"

with open(filename, "w", encoding="utf-8") as file:
    file.write(html_output)

print(f"html saved :))))))))))))) {filename}")

<!DOCTYPE html>
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <link href="https://cdn.jsdelivr.net/npm/bulma@0.9.1/css/bulma.min.css" rel="stylesheet"/>
 </head>
 <body>
  <section class="section">
   <div class="columns">
    <div class="column is-one-fifth">
     <img alt="article_pic" src="https://substackcdn.com/image/fetch/w_96,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdbe06c1-7a72-4d0e-b45e-484bd860fac6_500x500.png"/>
    </div>
    <div class="column">
     <h1 class="title">
     </h1>
     <p class="subtitle">
      Simplify your video workflow with AI...
     </p>
     <a href="https://heatherbcooper.substack.com/p/multi-modal-ai-video-creation-simplified">
      https://heatherbcooper.substack.com/p/multi-modal-ai-video-creation-simplified
     </a>
    </div>
   </div>
   <div class="columns">
    <div class="column is-one-fifth"

### Send Email

In [161]:
import smtplib, ssl
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

In [164]:
sender_email = "fia.newsletter.2025@gmail.com"
receiver_email = "asli.ilhan@arts.ac.uk"
# receiver_emails = ["asli.ilhan@arts.ac.uk", "m.robertsislam@fashion.arts.ac.uk", "c.kazantzis@arts.ac.uk", "e.cies@fashion.arts.ac.uk", "t.ellins@arts.ac.uk", "l.chatterton@fashion.arts.ac.uk"]
password = "kuvx ouol tnem relg"

newsletter_link = "https://fia-newsletter.vercel.app"

message = MIMEMultipart("alternative")
message["Subject"] = "🚀 Our Newsletter is Updated!"
message["From"] = sender_email
message["To"] = receiver_email

text = f"""Hey Team,
Our latest newsletter is now available. Click below to read it:

🔗 {newsletter_link} ➡️

Stay inspired!

The FIA's Newsletter RoBot 🤖
"""

html = f"""
<html>
    <body style="font-family: Arial, sans-serif; color: #333;">
        <p>Hey Team,</p>
        <p>Our latest newsletter is now available. Click below to read it:</p>
        <p>
            <a href="{newsletter_link}" style="font-size: 16px; text-decoration: none; color: #007BFF; font-weight: bold;">
                🔗 Read the Newsletter ➡️
                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                    <line x1="5" y1="12" x2="19" y2="12"></line>
                    <polyline points="12 5 19 12 12 19"></polyline>
                </svg>
            </a>
        </p>
        <p>Stay inspired!<br>
        <br>The FIA's Newsletter RoBot 🤖</p>
    </body>
</html>
"""

# part1 = MIMEText(text, "plain")
part2 = MIMEText(html, "html")
# message.attach(part1)
message.attach(part2)

context = ssl.create_default_context()
with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as server:
    server.login(sender_email, password)
    server.sendmail(sender_email, receiver_email, message.as_string())

print("🚀 Notification email sent successfully!")

🚀 Notification email sent successfully!
