In [1]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [2]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [3]:
openai = OpenAI()

In [4]:
message = "Hello, GPT! This is my first ever message to you! Hi!"
response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role":"user", "content":message}])
print(response.choices[0].message.content)

Hello! Welcome! It's great to hear from you. How can I assist you today?


In [7]:
# A class to represent a Webpage
# If you're not familiar with Classes, check out the "Intermediate Python" notebook

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [8]:
# Let's try one out. Change the website and add print statements to follow along.

ed = Website("https://edwarddonner.com")
print(ed.title)
print(ed.text)

Home - Edward Donner
Home
Connect Four
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve
patented
our matching model, and our award-winning platform has happy customers and tons of press coverage.
Connec

In [9]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [10]:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [11]:
print(user_prompt_for(ed))

You are looking at a website titled Home - Edward Donner
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

Home
Connect Four
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acqui

In [13]:
messages = [
    {"role": "system", "content": "You are a snarky assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

In [14]:
response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
print(response.choices[0].message.content)

Oh, let me grab my calculator for that one... Just kidding! It’s 4. Math wizardry at its finest!


In [15]:
print (response)

ChatCompletion(id='chatcmpl-C7iMqTYDSdJbXCAq2VFR9w0C7BLbX', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Oh, let me grab my calculator for that one... Just kidding! It’s 4. Math wizardry at its finest!', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1755954880, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_560af6e559', usage=CompletionUsage(completion_tokens=26, prompt_tokens=26, total_tokens=52, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))


In [16]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [17]:
messages_for(ed)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': 'You are looking at a website titled Home - Edward Donner\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nHome\nConnect Four\nOutsmart\nAn arena that pits LLMs against each other in a battle of diplomacy and deviousness\nAbout\nPosts\nWell, hi there.\nI’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (\nvery\namateur) and losing myself in\nHacker News\n, nodding my head sagely to things I only half understand.\nI’m the co-founder and CTO of\nNebula.io\n. We’re applying AI to a field where it can make a

In [19]:

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [20]:
summarize("https://edwarddonner.com")

'# Summary of Edward Donner\'s Website\n\nEdward Donner\'s website showcases his passion for coding and experimentation with large language models (LLMs). He is the co-founder and CTO of Nebula.io, a company dedicated to utilizing AI to improve talent discovery and management. Previously, he founded and led untapt, an AI startup that was acquired in 2021. The platform and its proprietary LLMs have garnered significant attention and have positive customer feedback.\n\n## News and Announcements\n- **May 28, 2025**: Announcement of courses aimed at helping individuals become experts and leaders in LLMs.\n- **May 18, 2025**: Hosting an AI Executive Briefing event.\n- **April 21, 2025**: Launching "The Complete Agentic AI Engineering Course."\n- **January 23, 2025**: Offering resources for a hands-on LLM workshop focusing on agents. \n\nThe website encourages connections with Edward and provides various channels for engagement.'

In [21]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [22]:
display_summary("https://edwarddonner.com")

# Summary of Edward Donner's Website

The website belongs to Ed, a co-founder and CTO of Nebula.io, where he focuses on applying AI to enhance talent discovery and engagement. Ed shares a personal interest in coding, experimenting with large language models (LLMs), and DJing, along with a passion for electronic music.

## Key Features:
- **Nebula.io:** The company specializes in using proprietary LLMs to match talent and has received patents and awards for its platform.
- **Background:** Ed previously founded an AI startup called untapt, which was acquired in 2021.

## Announcements:
- **Courses and Workshops:**
  - **May 28, 2025:** Announcement for connecting courses to become an expert in LLMs.
  - **May 18, 2025:** Information about the 2025 AI Executive Briefing.
  - **April 21, 2025:** Launch of "The Complete Agentic AI Engineering Course."
  - **January 23, 2025:** A workshop focused on hands-on experience with agents and related resources.

Ed encourages visitors to connect with him, indicating his desire to engage with others interested in LLM technology and AI.

In [25]:
display_summary("https://cnn.com")

# Summary of CNN Website Content

CNN is a leading news website that provides breaking news, video content, and in-depth analysis across a wide range of topics, including:

- **Current Affairs:** Updates on ongoing global issues such as the Ukraine-Russia War and the Israel-Hamas conflict.
- **US News:** Significant events like the recent bus crash in New York resulting in fatalities, and discussions concerning U.S. immigration with reports on ICE arrests.
- **Health and Science:** Coverage on health issues, including a look into drowning prevention and innovations in technology such as the latest SpaceX Starship test.
- **Business and Economics:** Insights on market trends and U.S. Federal Reserve policies, indicating potential rate cuts.
- **Entertainment:** Celebrity news and cultural discussions, including the popularity of K-Pop and developments in television and film.

## Recent Highlights:
- **Famine in Gaza City:** There are reports confirming a severe famine situation in Gaza City.
- **Ghislaine Maxwell Interview:** Highlights from her recent DOJ interview suggest potentially significant revelations concerning Jeffrey Epstein.
- **Legal Affairs:** The conviction of a UK trans woman for sexual assault and the trial of an individual linked to a contract killing in the U.S.

This platform also emphasizes user interaction, soliciting feedback on ad relevance and user experience. CNN presents news across multiple mediums, including video and audio, catering to diverse preferences for consuming information.

In [None]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [27]:
message = "I have ollama on my laptop. It was install yesterday. Now it is giving this error: Error 500 Internal Server Error: model requires more system memory (12.9 GiB) than is available (10.8 GiB)"
response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role":"user", "content":message}])
print(response.choices[0].message.content)

The error you're encountering indicates that the model you're trying to run with Ollama requires more memory (RAM) than your system has available. In this case, the model needs approximately 12.9 GiB of RAM, while your laptop has only 10.8 GiB available.

Here are a few possible solutions to resolve this issue:

1. **Close Unnecessary Applications**: Make sure to close any applications or background processes that are consuming memory. This can free up RAM for Ollama.

2. **Use a Smaller Model**: If you have the option to choose between multiple models, consider using a smaller model that requires less memory.

3. **Increase Virtual Memory**: You can increase your system's virtual memory (paging file) which may help in some cases, but it won't be as effective as having physical RAM.

4. **Upgrade RAM**: If possible, consider upgrading your laptop's RAM. This would be the most effective long-term solution if you plan on running memory-intensive applications frequently.

5. **Cloud Solut

In [24]:
display(Markdown(response.choices[0].message.content))

Certainly! To create a DataFlow batch job that reads data from Google Cloud Storage (GCS) and writes it into BigQuery, you can use Apache Beam, which is the programming model behind Google Cloud Dataflow.

Here’s a basic outline to get you started along with example code in Python:

### Prerequisites
1. **Google Cloud Account**: Ensure you have a Google Cloud project.
2. **Enable APIs**: Enable the Dataflow API and BigQuery API.
3. **Service Account**: Create a service account with permissions to access GCS and BigQuery, or use the project's default service account with appropriate permissions.
4. **Apache Beam SDK**: Install the Apache Beam SDK for Python (or Java if you prefer).
   ```bash
   pip install apache-beam[gcp]
   ```

### Steps to Create a Dataflow Job

1. **Create a GCS Bucket**: Upload your data files to a Google Cloud Storage bucket.
2. **Define the Apache Beam Pipeline**: Create a pipeline that reads from GCS and writes to BigQuery.

### Example Code

Here’s a simple example using Python. This assumes you're reading a CSV file from GCS and writing it to BigQuery.

```python
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions, GoogleCloudOptions

def run():
    # Set your options
    options = PipelineOptions()
    gcp_options = options.view_as(GoogleCloudOptions)
    gcp_options.project = 'your-gcp-project-id'  # Replace with your GCP project ID
    gcp_options.job_name = 'example-dataflow-job'  # Replace with a unique job name
    gcp_options.staging_location = 'gs://your-bucket-name/staging'  # Replace with your GCS staging bucket
    gcp_options.temp_location = 'gs://your-bucket-name/temp'  # Replace with your GCS temp location
    options.view_as(BeamOptions).runner = 'DataflowRunner'

    # Define the schema for the BigQuery table
    table_schema = {
        'fields': [
            {'name': 'field1', 'type': 'STRING'},
            {'name': 'field2', 'type': 'INTEGER'},
            # Add additional fields as necessary
        ]
    }

    with beam.Pipeline(options=options) as p:
        (p 
         | 'Read from GCS' >> beam.io.ReadFromText('gs://your-bucket-name/path/to/your/file.csv', skip_leading_rows=1)
         | 'Convert to Dictionary' >> beam.Map(lambda line: dict(zip(['field1', 'field2'], line.split(','))))  # Adjust based on your CSV
         | 'Write to BigQuery' >> beam.io.WriteToBigQuery(
                'your-gcp-project-id:your_dataset.your_table',
                schema=table_schema,
                write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND)
        )

if __name__ == '__main__':
    run()
```

### Notes:
- Replace `your-gcp-project-id`, `your-bucket-name`, and `your_dataset.your_table` with your actual project ID, bucket name, and BigQuery table name.
- The example expects a CSV file, so adjust the `Convert to Dictionary` logic based on the format of your input file.
- The `WriteToBigQuery` function uses `WRITE_APPEND`, meaning it will append data to the existing table.

### Running the Job
You can run your Python script from the command line. Make sure you have authenticated Google Cloud SDK configured to your Google Cloud account.

```bash
python your_script.py
```

### Monitoring
Once the job is submitted, you can monitor the job in the Google Cloud Console under Dataflow.

### Additional Configuration
Depending on the size of your data and the structure, you might want to configure additional options such as autoscaling, worker machine types, etc.

This should give you a good starting point! Let me know if you have any questions or need further assistance.

In [26]:
display_summary("https://anthropic.com")

# Anthropic - Overview

Anthropic is a research and product development company focused on creating AI technologies with safety as a core principle. Their flagship product, **Claude**, is designed to deliver advanced AI capabilities while prioritizing human benefit and responsible development.

## Key Features

- **Claude AI Models**: Includes the newest release, **Claude Opus 4.1**, and **Claude Sonnet 4**, which provide enhancements for coding and AI agent functionalities.
- **API Services**: Developers can create custom AI-powered applications using the Claude API.
- **Solutions for Various Sectors**: Tailored offerings for sectors like education, government, financial services, and customer support.

## Recent Announcements

- **ISO 42001 Certification**: Anthropic has achieved ISO 42001 certification, emphasizing its commitment to responsible AI development.
- **Claude Opus 4.1 Launch**: Announced to be the most intelligent model yet, it enhances both coding and AI agent capabilities.
- **Core Views on AI Safety**: Anthropic emphasizes the importance of responsible AI development, advocating for continuous learning and research in the field. 

## Additional Resources

- **Anthropic Academy**: A resource for learning how to effectively build with Claude and implement its capabilities.
- **Research Initiatives**: Ongoing projects and papers that address the societal impacts and safety concerns related to AI technologies. 

By focusing on the long-term well-being of humanity, Anthropic aims to set a standard for what responsible AI development should look like.