This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [None]:
import os

if 'A306709' in os.environ['USERNAME']:
    print("Running on Christoph's Telekom computer: openAI api key is already set in environment")
else:
    print("Set openAI api key manually:")
    os.environ["OPENAI_API_KEY"] = "YOUR API KEY"

os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

from langchain.llms import OpenAI


In [None]:
###
### Section 1: generate 10 arbitrary home descriptions using chat-gpt
### Saves the descriptions in a file
###
import openai
from pathlib import Path

house_offerings_filename = "house-offerings.txt"
prompt = """
Create 10 imaginary house offerings.
I'll give you the mandatory structure and you fill the blancs (marked with three dots: "...") with imaginary but realistic values.
Vary with plausible values, but use only exactly 4 different kind of neighbourhoods. Nevertheless, create 10 offerings, so that the neighbourhood values will repeat sometimes.
### mandatory structure:
Neighborhood: ...
Price: ...
Bedrooms: ...
Bathrooms: ...
House Size: ... sqft
Description: ...
Neighborhood Description: ...
### here is an example what I expect the result to be likely (don't use exactly this!):
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

if Path(house_offerings_filename).is_file():
    print("House offerings already exist. Don't generate them again. Save tokens instead.")
else:
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are a writer for house offerings. "
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        temperature=0.5,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    # The response is a JSON object containing more information than the generated post. We want to return only the message content
    offerings = response.choices[0].message.content

    with open(house_offerings_filename, "w") as file:
        file.write(offerings)

    print(offerings)


In [None]:
###
### Section 2: load the house offerings from txt file, create embeddings and save them to a vector database
### hint: see Exercise 3&4 from chapter 2.11, 2.14 how to use lancedb
###
import numpy as np
from sentence_transformers import SentenceTransformer
import lancedb
from lancedb.pydantic import vector, LanceModel

EMBEDDINGS_MODEL_NAME = 'paraphrase-MiniLM-L6-v2'

def generate_embeddings(description: str, neighborhood_description: str) -> np.ndarray:
    model = SentenceTransformer(EMBEDDINGS_MODEL_NAME)
    embeddings = model.encode(description + "\n" + neighborhood_description)
    return embeddings

# find out the dimension of the embeddings (expected: 384) needed for table definition:
arbitrary_embeddings = generate_embeddings("a", "b")
embeddings_dim = arbitrary_embeddings.shape[0]
print(f"dimension of embedding: {embeddings_dim}")


class HouseOffer(LanceModel):
    embeddings: vector(embeddings_dim)
    neighborhood: str
    price: float
    num_bedrooms: float
    num_bathrooms: float
    house_size: float
    description: str
    neighborhood_description: str

db = lancedb.connect("./house-offerings-db")
table_name = "house_offers"
db.drop_table(table_name, ignore_missing=True)
table = db.create_table(table_name, schema=HouseOffer)

# do a very primitive parsing of the text file, reading it line by line:
with open(house_offerings_filename, "r") as file:
    house_data_array = file.read().split("\n")

for current_line in house_data_array:
    if "Neighborhood:" in current_line:
        current_neighborhood = current_line.split(":")[1].strip()
    if "Price: $" in current_line:
        current_price = current_line.split("$")[1].strip().replace(",", "")
    if "Bedrooms:" in current_line:
        current_num_bedrooms = current_line.split(":")[1].strip().replace(",", "")
    if "Bathrooms:" in current_line:
        current_num_bathromms = current_line.split(":")[1].strip().replace(",", "")
    if "House Size:" in current_line:
        current_house_size = current_line.split(":")[1].replace(",", "").replace("sqft", "").strip()
    if "Description:" in current_line and not "Neighborhood Description:" in current_line:
        current_description = current_line.split(":")[1].strip()
    if "Neighborhood Description:" in current_line:
        current_neighborhood_description = current_line.split(":")[1].strip()
        # last row of house offer, row complete, create entry to db table:
        house_offer = HouseOffer(
            embeddings = generate_embeddings(current_description, current_neighborhood_description),
            neighborhood = current_neighborhood,
            price = current_price,
            num_bedrooms = current_num_bedrooms,
            num_bathrooms = current_num_bathromms,
            house_size = current_house_size,
            description = current_description,
            neighborhood_description = current_neighborhood_description
        )
        table.add([dict(house_offer)])

# assure that we created 10 rows:
assert len(table) == 10


In [None]:
# have a look inside the db
table.search().to_pydantic(HouseOffer)

In [64]:
###
### Section 3: Searching Based on Preferences
###
def seek_matching_offers(answers, num_rows):
    query_vector = generate_embeddings("", "\n".join(answers))
    results = table.search(query_vector).limit(num_rows).to_pydantic(HouseOffer)
    return results


In [99]:
###
### Section 4: Personalizing Listing Descriptions
###
import openai

# helper function: create a new-line-separated list of questions and answers from the given arrays (each row consists of one "question: answer" pair):
def create_q_a_pairs_from_arrays(user_questions, user_answers):
    q_a_list = []
    for q, a in zip(user_questions, user_answers):
        q_a_list.append(f"{q}: {a}")
    return "\n".join(q_a_list)

def create_augmention_prompt(description, neighborhood_description, user_questions, user_answers) -> str:
    return f"""
I give you two parts of a house offering: a description of the house itself and a description of its neighborhood.
Furthermore I give you some preferences of my client in the form of questions and the client's answers to these questions.
Your task is to summarize the house description and the neighborhood description in an appealing way that emphasizes the given client's preferences.
###
House description: {description}
###
Neighborhood description: {neighborhood_description}
###
Client's preferences defined by 5 questions and answers:
{create_q_a_pairs_from_arrays(user_questions, user_answers)}
"""

def augment_offer_according_preferences(house_offer: HouseOffer, user_questions, user_answers):
    augment_prompt = create_augmention_prompt(house_offer.description, house_offer.neighborhood_description, user_questions, user_answers)
    augment_response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "Your job is to rewrite house offerings so that they sound better according to the prefernces of my client. "
            },
            {
                "role": "user",
                "content": augment_prompt
            }
        ],
        temperature=0.5,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return augment_response.choices[0].message.content


In [100]:
###
### Section 5: Proceed given test data and show results
###
def find_and_augment_and_print_offers(user_questions, user_answers, num_results):
    offers = seek_matching_offers(answers, num_results)
    index = 1
    for offer in offers:
        print(f"********** Offer no. {index} **********")
        print("*** part 1: original offer:")
        print(offer)
        print("*** part 2: emphasize client's prefernces:")
        print(augment_offer_according_preferences(offer, user_questions, user_answers))
        index += 1


In [101]:
###
### Test 1: now lets test it with the test data given in the exercise description:
###
questions = [
    "How big do you want your house to be?",
    "What are 3 most important things for you in choosing this property?",
    "Which amenities would you like?",
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?",
]
answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]

find_and_augment_and_print_offers(questions, answers, 3)

********** Offer no. 1 **********
*** part 1: original offer:
embeddings=FixedSizeList(dim=384) neighborhood='Maple Grove' price=500000.0 num_bedrooms=3.0 num_bathrooms=2.5 house_size=2000.0 description='Nestled in the charming Maple Grove neighborhood, this 3-bedroom, 2.5-bathroom home exudes warmth and character. The updated kitchen features quartz countertops and a breakfast nook, while the cozy family room offers a fireplace and built-in bookshelves. The backyard oasis includes a deck for outdoor gatherings and a lush garden.' neighborhood_description="Maple Grove is a tight-knit community with a strong sense of pride in homeownership. Residents can enjoy the local farmers' market, community events, and walking trails throughout the neighborhood. Maple Grove offers a peaceful setting with easy access to schools, parks, and shopping centers."
*** part 2: emphasize client's prefernces:
Summary:
This charming 3-bedroom, 2.5-bathroom home in Maple Grove offers a comfortable living spac

In [102]:
###
### Test 2: lets test it with some simple questions and answers created by myself:
###
questions = [
    "Which is your preferred neighborhood?",
    "What is the only absolutely important thing you need in your new house?"
]
answers = [
    "I love the Maple Grove region.",
    "I need to have a fireplace."
]

find_and_augment_and_print_offers(questions, answers, 1)


********** Offer no. 1 **********
*** part 1: original offer:
embeddings=FixedSizeList(dim=384) neighborhood='Maple Grove' price=480000.0 num_bedrooms=3.0 num_bathrooms=2.0 house_size=1900.0 description='Nestled in the serene Maple Grove neighborhood, this 3-bedroom, 2-bathroom home offers comfort and style. The home features a cozy family room with a fireplace, a modern kitchen with granite countertops, and a formal dining room for entertaining. The backyard retreat includes a deck for outdoor gatherings and a lush garden.' neighborhood_description='Maple Grove is a close-knit community with a strong sense of community pride and engagement. Residents can participate in local events, visit the nearby parks and playgrounds, and enjoy the convenience of shopping centers and schools within the neighborhood. Maple Grove offers a peaceful environment for families and professionals alike.'
*** part 2: emphasize client's prefernces:
Summary:
This charming 3-bedroom, 2-bathroom home in the sou