## "extract" instead of "summarize"

In [1]:
import openai
import os

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [2]:
def get_completion(prompt, model="gpt-3.5-turbo"): 
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]


Course Description: https://areganti.notion.site/Applied-LLMs-Mastery-2024-562ddaa27791463e9a1286199325045c


In [13]:
# Description
applied_LLM="""
Welcome to an exciting 10-week journey into the world of large language models!

LLMs are currently experiencing a substantial surge in popularity. Their significance has notably increased in diverse applications, including natural language processing, machine translation, and code and text generation. This rise in prominence is driven by a growing trend among both companies and individuals to leverage LLMs for automating a wide range of tasks. Understanding and learning about LLMs is highly valuable in light of their growing usage and transformative impact across various domains.

If you're eager to dive into this trend, you'll discover plenty of resources on the internet. But here's the catch – many of them are all over the place, missing a step-by-step guide from basics to real-world use. This can be overwhelming, and you might feel a bit lost. 

Imagine this course as your comprehensive guide, exploring every aspect of using LLMs in real-world scenarios. It serves as the crucial link that brings everything together. Each week, we'll delve into the above topics, providing in-depth insights and hands-on experiences. This approach ensures you gain a thorough and well-rounded understanding of every facet within the topic.
"""

## summarize

In [14]:
summarize_prompt = f"""
    Your task is to generate a short summary of an online course. 

    Summarize the course description below, delimited by triple \
    backticks in at most 50 words. 

    Description: ```{applied_LLM}```
    """

In [15]:
# Summarizing Description
summarize_response = get_completion(summarize_prompt)
print(summarize_response, "\n")

This 10-week course offers a comprehensive guide to large language models (LLMs), exploring their applications in natural language processing, machine translation, and more. Gain in-depth insights and hands-on experience to understand and leverage LLMs effectively in various domains. 



## extract

In [16]:
extract_prompt = f"""
    Your task is to generate a short extract of an online course. 

    Extract the course description below, delimited by triple \
    backticks in at most 50 words. 

    Description: ```{applied_LLM}```
    """

In [17]:
# Extract Description
extract_response = get_completion(extract_prompt)
print(extract_response, "\n")

Join us for a 10-week journey into the world of large language models! Explore their growing significance in natural language processing, machine translation, and more. This course provides a step-by-step guide to using LLMs in real-world scenarios, ensuring a comprehensive understanding of their transformative impact. 



In [21]:
# redefine summarize and etract prompt to see the difference

f"Summary: {len(summarize_response)}, Extract: {len(extract_response)}"

'Summary: 284, Extract: 320'

## Brief analysis: common words and sentiment using prompt!

In [30]:
from collections import Counter
import re
import json

In [31]:
def count_words(text):
    words = re.findall(r'\b\w+\b', text.lower())
    return Counter(words)

In [32]:
summarize_count = count_words(summarize_response)
extract_count = count_words(extract_response)

### Find common words and their counts

In [35]:
common_words = {}
for word in summarize_count:
    if word in extract_count:
        common_words[word] = {
            "summarize_count": summarize_count[word],
            "extract_count": extract_count[word]
        }

# Convert to JSON
json_output = json.dumps(common_words, indent=4)


# Print JSON with count
print(json_output)

dict_keys(['this', '10', 'week', 'course', 'a', 'comprehensive', 'guide', 'to', 'large', 'language', 'models', 'llms', 'their', 'in', 'natural', 'processing', 'machine', 'translation', 'and', 'more'])
{
    "this": {
        "summarize_count": 1,
        "extract_count": 1
    },
    "10": {
        "summarize_count": 1,
        "extract_count": 1
    },
    "week": {
        "summarize_count": 1,
        "extract_count": 1
    },
    "course": {
        "summarize_count": 1,
        "extract_count": 1
    },
    "a": {
        "summarize_count": 1,
        "extract_count": 3
    },
    "comprehensive": {
        "summarize_count": 1,
        "extract_count": 1
    },
    "guide": {
        "summarize_count": 1,
        "extract_count": 1
    },
    "to": {
        "summarize_count": 2,
        "extract_count": 1
    },
    "large": {
        "summarize_count": 1,
        "extract_count": 1
    },
    "language": {
        "summarize_count": 2,
        "extract_count": 2
    },
    "mo

### sentiment: Lets check the sentiment using Prompt

In [36]:
prompt = f"Analyzing the sentiment of the following text:\n{summarize_response}\nSentiment:"
sentiment_response = get_completion(prompt)
print(sentiment_response, "\n")

Positive 



In [37]:
prompt = f"Analyzing the sentiment of the following text:\n{extract_response}\nSentiment:"
sentiment_response = get_completion(prompt)
print(sentiment_response, "\n")

Positive 

