In [11]:
%%time
import faiss, pickle, nltk, re, pandas as pd, numpy as np
from sentence_transformers import SentenceTransformer
from nltk import pos_tag
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize, sent_tokenize
from groq import Groq

stop_words=set(stopwords.words('english'))
lemmatizer=WordNetLemmatizer()

CPU times: user 1.02 ms, sys: 794 Œºs, total: 1.82 ms
Wall time: 1.37 ms


### Importing and Formatting Data

In [2]:
facts = pd.read_csv('disaster_knowledge.csv')
news = pd.read_json('classified_disaster_news.json')

facts.DisasterType_InfoType = facts.DisasterType_InfoType.str.replace('_', ' ', regex=False)
facts.Information = facts.DisasterType_InfoType + ': ' + facts.Information

news.content = news.title + ": " + news.content

knowledge = pd.concat([facts.Information, news.content], axis=0, ignore_index=True)

### Preprocessing

In [3]:
def wn_tagger(nltk_tag):
    if nltk_tag.startswith('J'):
        return wordnet.ADJ
    elif nltk_tag.startswith('V'):
        return wordnet.VERB
    elif nltk_tag.startswith('N'):
        return wordnet.NOUN
    elif nltk_tag.startswith('R'):
        return wordnet.ADV
    else:          
        return wordnet.NOUN

In [4]:
def preprocess(text):
    tokens=nltk.word_tokenize(re.sub('-',' ',text))
    filtered_tokens=[word.lower() for word in tokens if word.lower() not in stop_words and word not in [';','(',')','{','}',',','.']]
    pos_tags=pos_tag(filtered_tokens)
    lemmatized_tokens=[]
    for word,tag in pos_tags:
        lemmatized_tokens.append(lemmatizer.lemmatize(word,wn_tagger(tag)))
    return lemmatized_tokens

In [11]:
%%time
knowledge.tokens = knowledge.apply(preprocess)

CPU times: total: 3.58 s
Wall time: 3.68 s


### Vectorizing All Data

In [12]:
%%time
tfmr = SentenceTransformer("all-MiniLM-L6-v2")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


CPU times: total: 4.06 s
Wall time: 1min 23s


In [8]:
%%time
knowledge_embds = tfmr.encode(knowledge.tokens)

CPU times: total: 1.66 s
Wall time: 633 ms


In [9]:
knowledge_pickle = open('knowledge_pickle', 'wb')
pickle.dump(knowledge_embds,knowledge_pickle)                    
knowledge_pickle.close()

In [10]:
knowledge_pickle = open('knowledge_pickle', 'rb')    
knowledge_embds = pickle.load(knowledge_pickle)
knowledge_pickle.close()

### Adding Vectorized Data to Vector Storage

In [11]:
d = knowledge_embds.shape[1]
index = faiss.IndexFlatIP(d)
index.add(knowledge_embds)
faiss.write_index(index, 'knowledge')
index = faiss.read_index('knowledge')

In [12]:
query = "I'm in the middle of an earthquake in a building in Myanmar. What is happening? What do I do?"
query_embd = tfmr.encode([query])
D, I = index.search(np.array(query_embd), k=10)

i = 1
for idx in I[0]:
    print(i, ".\n", knowledge[idx], "\n")
    i+=1

1 .
 Myanmar earthquake live updates: Deadly 7.7 magnitude quake collapses skyscrapers: Reports from International Rescue Committee teams working in central Myanmar and Shan State reveal the staggering scale of humanitarian needs in the aftermath of Friday‚Äôs earthquake.

IRC partners have started distributing essential support to the earthquake-hit communities, including water, hygiene kits and critical medical assistance.

One of the IRC staff members working in Mandalay said they have ‚Äúnever experienced anything like this.‚Äù

People take shelter in temporary tents set up outdoors in Mandalay on March 31, 2025. Sai Aung Main/AFP via Getty Images

‚ÄúI have lived through several earthquakes in my life, but I have never experienced anything like this,‚Äù the staff member said. ‚ÄúThe most powerful shaking lasted only a few seconds, and many people immediately ran out of their houses.‚Äù

The staff member said many people are sleeping outside their homes on roads or open fields due 

In [13]:
context = "\n".join([knowledge[i] for i in I[0]])
prompt = f"Use the information below to answer:\n{context}\n\nQ: {query}\nA:"

client = Groq(api_key = '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')

llm_eval = client.chat.completions.create(
    messages=[
        {
            "role" : "user",
            "content" : prompt,
        }
    ],
    model = "llama3-70b-8192",
)

print(llm_eval.choices[0].message.content)

You are experiencing a sudden shaking of the ground caused by the passage of seismic waves through the Earth's rocks, resulting from the release of accumulated stress along geologic faults or by volcanic activity. This is a life-threatening situation. 

To protect yourself, immediately:

1. **DROP** to the ground to avoid being knocked over or falling.
2. **COVER** your head and body by getting under a sturdy piece of furniture, such as a table or desk. Hold onto the furniture to keep yourself sheltered.
3. **HOLD ON** until the shaking stops. This will help prevent you from being injured by falling debris or being knocked over.

**Stay away from:**

* Windows and exterior walls, as they are the most vulnerable to damage.
* Hanging objects, such as ceiling fans or chandeliers, that could fall and cause injury.
* Doors, as they may swing and cause injury.

**If you are outdoors:**

* Move to an open area away from buildings, trees, and power lines.
* Stay away from any potential hazards