In [None]:
from models import GenerationModel, EmbeddingsModel, MilvusClient
import os

# # Initialize the models
embedding_model = EmbeddingsModel("sentence-transformers/bert-base-nli-mean-tokens")
embedding_model.load_model()
print('Embedding model loaded')

home_dir = os.getenv("HOME")
generation_model_name = f'{home_dir}/ext-gits/Mistral-7B-Instruct-v0.3'
generation_model = GenerationModel(generation_model_name)
generation_model.load_model()
print('Mistral loaded')

# Initialize the Milvus client and connect to the database
milvus_client = MilvusClient("wiki_movie_plots")
print('Milvus loaded')


Already connected to Milvus with alias 'default'
Collection 'wiki_movie_plots' loaded
Milvus loaded


In [8]:

print('Ingesting data')
# Insert embeddings to Milvus if needed (takes time)
csv_file_path = "data/wiki_movie_plots_deduped.csv"
milvus_client.get_collection()
milvus_client.ingest_data(csv_file_path, embedding_model, batch_size=128)
print('Data ingested')

Ingesting data


Inserting Batches:   0%|          | 0/273 [00:00<?, ?batch/s]

Number of records in the collection: 35142
Data ingested


In [9]:
# milvus_client.drop_collection()

# milvus_client.get_collection()

milvus_client.count_records()

Number of records in the collection: 35142


In [13]:

plot_query = "Sexy aliens start reproducing with earthlings to create a hybrid race of interstellar diplomats."

embedding = embedding_model.get_batch_embeddings([plot_query])
search_results = milvus_client.search(embedding)


context_texts = ''
for result in search_results:
    for hit in result:
        # print returns for context
        print(hit.entity.get("title"))
        print(hit.entity.get("release_year"))
        print(hit.entity.get("plot_text"))
        plot_text = hit.entity.get("plot_text")
        context_texts += plot_text


prompt = f"Using the following plot summaries, write a cohesive summary or review of movies similar to '{plot_query}':\n\n{context_texts}"

# Generate a response using the GenerationModel
response = generation_model.generate_response(
    prompt,
    max_new_tokens=250,
    num_return_sequences=1,
    do_sample=True,
    top_p=0.95,
    top_k=50
)


print("Generated Summary:")
print(response)


Embedding type: <class 'list'>, First element type: <class 'float'>
Mars Needs Women
1967
a u. s. military station in houston, the united states decoding service (u.s.d.s.), nasa wing, has intercepted a message from outer space. after decoding, the message contains only the cryptic statement: "mars ... needs ... women" martians have developed a genetic deficiency that now produces only male children. a mission to earth is launched, consisting of five martian males, led by dop (tommy kirk). once here, their team intends to recruit earth women to come to mars to mate and produce female offspring, saving their civilization from extinction. using their sophisticated transponder, dop attempts to make contact with the u.s. military, which has now tracked the aliens' arrival on earth. the military eventually views the martians as invaders, so the team takes on the guise of earth men, acquiring human clothes, money, maps, and transportation. they finally select their prospective candidates, se