In [3]:
pip install transformers beautifulsoup4 requests



In [4]:
import os
import requests
from bs4 import BeautifulSoup
from transformers import pipeline

In [5]:
from huggingface_hub import login

hf_token = os.environ.get('HF_TOKEN')

In [6]:
# Function to extract company information from a URL
def get_company_data(url):
    # Send a GET request to the URL
    response = requests.get(url)

    if response.status_code != 200:
        raise Exception(f"Failed to retrieve webpage: {url}")

    # Parse the HTML content of the page
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract the title of the company (assuming it's in a <title> tag)
    title = soup.title.string if soup.title else "Unknown Title"

    # Extract the description and other relevant information
    # This depends on the structure of the webpage; here we use generic examples
    # Adjust according to the page's HTML structure

    # Example of extracting description (from meta tags)
    description = soup.find('meta', attrs={'name': 'description'}) or soup.find('meta', attrs={'property': 'og:description'})
    description = description['content'] if description else "No description available."

    # Example of extracting content (using paragraphs or div tags)
    content = " ".join([p.text for p in soup.find_all('p')])

    return {
        'title': title,
        'description': description,
        'content': content
    }


In [28]:
# Function to generate text using GPT-Neo
def generate_article(company_data):
    # Initialize the GPT-Neo model
    generator = pipeline('text-generation', model='EleutherAI/gpt-neo-1.3B')

    # Create the prompt using the company data
    prompt = (
        f"You are a professional content writer helping a sales team understand potential clients. Your task is to create a concise and informative article about a company based on the information below.\n\n"
        f"Company Name: {company_data['title']}\n"
        f"Description: {company_data['description']}\n"
        f"Content: {company_data['content']}\n\n"
        f"Write a professional summary about the company in 3-5 sentences. Focus on their mission, services, and any standout qualities. The tone should be formal yet approachable, suitable for a sales briefing."
    )

    # Generate the text using GPT-Neo
    result = generator(prompt, max_new_tokens=200, num_return_sequences=1, truncation=True)

    # Extract and return the generated text
    generated_text = result[0]['generated_text']
    return generated_text



In [8]:
# Function to save generated article to a .txt file
def save_to_txt(generated_text, filename="generated_article.txt"):
    with open(filename, 'w') as file:
        file.write(generated_text)
    print(f"Article saved to {filename}")



In [29]:
# URL of the company website
url = 'https://www.ug-inc.com/'

# Get the company data from the URL
company_data = get_company_data(url)

# Generate the article
informative_article = generate_article(company_data)

# Print the informative article
print(informative_article)

# Save the informative article to a .txt file
save_to_txt(informative_article)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You are a professional content writer helping a sales team understand potential clients. Your task is to create a concise and informative article about a company based on the information below.

Company Name: Printing Services in El Paso | Universal Graphics, Inc. | Print Shop
Description: We are a company specializing in commercial printing services. Our print shop services include promotional products, graphic & design, and much more.
Content: Print Shop — Professional Printing Services in El Paso Since 1988, Universal Graphics, El Paso’s go-to print shop, has been at the center of the ever-changing printing industry. It began a generation ago. Founder Gil Lespron grew up watching his father busy at work in the printing press. He soon decided that the art and craft of print was something he was interested in. He too wanted to help people tell their stories through the printed word. Since then, the printing business has changed as the mechanisms and processes for creating print materi