In [24]:
from dotenv import load_dotenv
import os
from openai import OpenAI

load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=openai_api_key)

In [25]:
from wikipediaapi import Wikipedia
wiki = Wikipedia('RAGBOT/0.0','en')
doc = wiki.page('Hayao_Miyazaki').text
paragraphs = doc.split('\n\n')

In [26]:
import textwrap

In [27]:
for i, p in enumerate(paragraphs):
    wrapped_text = textwrap.fill(p, width=100)
    print("----------------------------------------------------------")
    print(f'{i}: {wrapped_text}')
    print("----------------------------------------------------------")

----------------------------------------------------------
0: Hayao Miyazaki (宮崎 駿 or 宮﨑 駿, Miyazaki Hayao, Japanese: [mijaꜜzaki hajao]; born January 5, 1941) is
a Japanese animator, filmmaker, and manga artist. A founder of Studio Ghibli, he has attained
international acclaim as a masterful storyteller and creator of Japanese animated feature films, and
is widely regarded as one of the most accomplished filmmakers in the history of animation. Born in
Tokyo City in the Empire of Japan, Miyazaki expressed interest in manga and animation from an early
age, and he joined Toei Animation in 1963. During his early years at Toei Animation he worked as an
in-between artist and later collaborated with director Isao Takahata. Notable films to which
Miyazaki contributed at Toei include Doggie March and Gulliver's Travels Beyond the Moon. He
provided key animation to other films at Toei, such as Puss in Boots and Animal Treasure Island,
before moving to A-Pro in 1971, where he co-directed Lupin th

In [28]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2', trust_remote_code=True)

In [29]:
docs_embed = model.encode(paragraphs, normalize_embeddings=True)
docs_embed.shape

(19, 384)

In [30]:
docs_embed[0]

array([-2.31464151e-02, -7.16991574e-02, -2.48916633e-02, -6.26568422e-02,
       -1.29291620e-02, -8.27795453e-03,  6.21424243e-02,  4.66236174e-02,
       -1.00533338e-02, -1.49427913e-02,  7.75854513e-02, -2.50172894e-02,
        2.39263736e-02,  8.97218138e-02, -3.55678052e-02,  7.29549453e-02,
       -2.10700650e-03,  2.44259890e-02, -1.12973554e-02, -9.90779474e-02,
        7.37928078e-02, -9.58667025e-02, -3.12669650e-02, -6.91230223e-02,
       -1.72936246e-02, -4.32372317e-02,  2.81401835e-02,  2.20665373e-02,
       -3.01350728e-02,  5.40369237e-03, -7.12047070e-02,  3.92454788e-02,
       -3.31546217e-02, -3.76769714e-02,  1.52829532e-02,  1.50460884e-01,
       -2.27354504e-02,  9.99577623e-03, -3.20613459e-02, -4.74128276e-02,
       -5.17182089e-02,  3.60440686e-02, -2.98575102e-03, -5.00223562e-02,
        8.38011280e-02, -7.13430494e-02,  4.23677377e-02, -2.79667918e-02,
       -9.93525833e-02,  7.59793818e-02, -7.44252056e-02, -1.64125282e-02,
        3.76579314e-02, -

In [31]:
query = "What was Studio Ghibli's first film?"
query_embed = model.encode(query, normalize_embeddings=True)

In [32]:
query_embed.shape

(384,)

In [33]:
import numpy as np
similarities = np.dot(docs_embed, query_embed.T)


In [34]:
similarities.shape

(19,)

In [35]:
similarities

array([0.40893707, 0.24359593, 0.16747975, 0.29962403, 0.27441964,
       0.4034388 , 0.6136512 , 0.33797494, 0.36417538, 0.29610068,
       0.25368547, 0.27554542, 0.35649553, 0.24063519, 0.33704603,
       0.42061728, 0.3468451 , 0.03550772, 0.6087109 ], dtype=float32)

In [36]:
top_3_idx = np.argsort(similarities, axis=0)[-3:][::-1].tolist()

In [37]:
top_3_idx

[6, 18, 15]

In [38]:
most_similar_documents = [paragraphs[idx] for idx in top_3_idx]

In [39]:
CONTEXT = ""
for i, p in enumerate(most_similar_documents):
    wrapped_text = textwrap.fill(p, width=100)

    print("----------------------------------------------------------")
    print(f'{i}: {wrapped_text}')
    print("----------------------------------------------------------")
    CONTEXT += wrapped_text + "\n\n"

----------------------------------------------------------
0: Studio Ghibli Early films (1985–1996) On June 15, 1985, Miyazaki and Takahata founded the animation
production company Studio Ghibli as a subsidiary of Tokuma Shoten. Studio Ghibli's first film was
Laputa: Castle in the Sky (1986), directed by Miyazaki. Some of the architecture in the film was
also inspired by a Welsh mining town; Miyazaki witnessed the mining strike upon his first visit to
Wales in 1984 and admired the miners' dedication to their work and community. Laputa was released on
August 2, 1986, by the Toei Company. It sold around 775,000 tickets; Miyazaki and Suzuki expressed
their disappointment with the film's box office figures. Miyazaki's following film, My Neighbor
Totoro, was released alongside Takahata's Grave of the Fireflies in April 1988 to ensure Studio
Ghibli's financial status. My Neighbor Totoro features the theme of the relationship between the
environment and humanity, showing that harmony is the r

In [40]:
query = "What was Studio Ghibli's first film?"

In [41]:
prompt = f"""
use the following CONTEXT to answer the QUESTION at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {CONTEXT}
Question: {query}

"""

In [42]:
response = client.chat.completions.create(model="gpt-4o", messages=[{"role": "user", "content": prompt}])
response.choices[0].message.content

"Studio Ghibli's first film was *Laputa: Castle in the Sky* (1986), directed by Hayao Miyazaki."