### Assessment 3:
	1. Input a prompt from the user
	2. Generate 3 outcomes from a generative AI model:
  - a. A story of 100 words about the prompt 
  - b. This story should be represented in a vector/embeddings format 
  - c. Find all Part-of-speeches in the generated story 


In [0]:
from openai import AzureOpenAI
import spacy

In [0]:
#spacy
!python -m spacy download en_core_web_sm
nlp = spacy.load('en_core_web_sm')

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/12.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/12.8 MB[0m [31m10.0 MB/s[0m eta [36m0:00:02[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/12.8 MB[0m [31m48.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━[0m [32m8.2/12.8 MB[0m [31m79.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m12.8/12.8 MB[0m [31m144.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m12.8/12.8 MB[0m [31m144.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━

In [0]:
def text_to_embeddings(text: str):
    """
    Converts an input text of 100 words into a vector representation using spaCy embeddings.
    
    Args:
        text (str): The input text containing approximately 100 words.
    
    Returns:
        list: A list of 300-dimensional word embeddings for each word in the input text.
    """
    
    # Ensure the input is not too short or too long
    words = text.split()
    if len(words) > 100:
        words = words[:100]  # Truncate to 100 words
    elif len(words) < 100:
        raise ValueError("Input text must contain at least 100 words.")
    
    # Process the text with spaCy
    doc = nlp(" ".join(words))
    
    # Extract embeddings for each word
    embeddings = [token.vector for token in doc if token.has_vector]
    
    # Ensure the embeddings list matches the input words (up to 100 embeddings)
    if len(embeddings) != len(words):
        raise ValueError("Not all words have valid embeddings.")
    
    return embeddings

In [0]:
def extract_pos_tags(story: str):
    """
    Extracts all part-of-speech (POS) tags from the given text using spaCy.

    Args:
        story (str): The input text (story) to analyze.
    
    Returns:
        list: A list of tuples where each tuple contains a word and its corresponding POS tag.
    """
    # Load the spaCy model
    # nlp = spacy.load("en_core_web_sm")
    
    # Process the text
    doc = nlp(story)
    
    # Extract words and their POS tags
    pos_tags = [(token.text, token.pos_) for token in doc]
    
    return pos_tags

In [0]:
def generate_stories(input_text: str, num_stories: int = 3, word_limit: int = 100):
    """
    Generates multiple 100-word stories based on the user's prompt using OpenAI's GPT model.
    
    Args:
        prompt (str): The user-provided prompt for the story.
        num_stories (int): Number of stories to generate. Default is 3.
        word_limit (int): Word limit for each story. Default is 100 words.
    
    Returns:
        list: A list of generated stories.
    """
    azure_oai_endpoint =""
    azure_oai_key =""
    azure_oai_deployment =""

    # Initialize the Azure OpenAI client
    client = AzureOpenAI(
        azure_endpoint = azure_oai_endpoint, 
        api_key=azure_oai_key,  
        api_version="2024-02-15-preview"
        )
    stories = []

    # Initialize messages array to maintain chat history
    messages_array = [
        {"role": "system", "content": "You are a helpful assistant with a lot of humor."},
        {"role": "user", "content": f"Generate a paragraph of text using only {word_limit} words."}
    ]

    try:
        for _ in range(num_stories):
            messages_array.append({"role": "user", "content": f"Write a story of about {word_limit} words based on the prompt: {input_text}."})
            response = client.chat.completions.create(
                model=azure_oai_deployment,
                max_tokens=word_limit * 2,  # Adjusting for token overhead
                temperature=0.7,  # Controls creativity
                n=1,  # One story at a time
                messages = messages_array
            )
            story = response.choices[0].message.content.strip()
            stories.append(story)
    except Exception as e:
        print(f"An error occurred: {e}")
    return stories

In [0]:
# Prompt the user for input
user_prompt = input("Enter a prompt for your story: ")

# Generate 3 stories based on the prompt
generated_stories = generate_stories(user_prompt)

# Display the stories
print("\nGenerated Stories:")
for i, story in enumerate(generated_stories, start=1):
    print(f"\nStory {i}:\n{story}")


for i, story in enumerate(generated_stories, start=1):
    #  represented in a vector/embeddings format 
    try:
        embeddings = text_to_embeddings(story)
        print(f"Generated {len(embeddings)} word embeddings.")
    except ValueError as e:
        print(e)

    try:
        # Extract POS tags from the story
        pos_tags = extract_pos_tags(story)

        # Print the POS tags
        for word, pos in pos_tags:
            print(f"{word}: {pos}")
    except Exception as e:
        print(e)


Enter a prompt for your story:  pandas

An error occurred: Error code: 401 - {'statusCode': 401, 'message': 'Unauthorized. Access token is missing, invalid, audience is incorrect (https://cognitiveservices.azure.com), or have expired.'}

Generated Stories:
