## Install Libraries

In [6]:
! pip install --quiet --upgrade chromadb
! pip install --quiet --upgrade pandas
! pip install --quiet --upgrade llama-index
! pip install --quiet --upgrade openai
! pip install --quiet --upgrade llama-index-vector-stores-chroma
! pip install --quiet --upgrade llama-index-embeddings-langchain
! pip install --quiet --upgrade langchain-community
! pip install --quiet --upgrade sentence-transformers


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is availa

## Setup Model
#### Must create environment variable with key https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety

In [2]:
import os
import openai
openai.api_key = os.environ["OPENAI_API_KEY"]

## Create Chroma Client

In [3]:
import chromadb
chroma_client = chromadb.Client()




## Create Collection

In [4]:
eventsCollection = chroma_client.create_collection(name="Events")
ostemCollection = chroma_client.create_collection(name="OSTEM")
pathwaysCollection = chroma_client.create_collection(name="Pathways")
researchCollection = chroma_client.create_collection(name="Research")

## Add Documents to Collection

#### Create Dataframes

In [5]:
import pandas as pd

relativePath="data/"

events = pd.read_csv(f"{relativePath}events.csv")
ostem = pd.read_csv(f"{relativePath}ostems.csv")
pathways = pd.read_csv(f"{relativePath}pathways.csv")
research = pd.read_csv(f"{relativePath}solicitations.csv")

#### Events

In [6]:
idEvents=events["ID"]
titleEvents=events["Title"]
descriptionEvents=events["Description"]
urlEvents=events["URL"]
typeEvents=events["Type"]

display(events)

#### OSTEM

In [7]:
idOstem=ostem["ID"]
titleOstem=ostem["Title"]
descriptionOstem=ostem["Description"]
urlOstem=ostem["URL"]
typeOstem=ostem["Type"]

display(ostem)

#### Pathways

In [8]:
idPathways=pathways["ID"]
titlePathways=pathways["Title"]
educationLevel=pathways["Education Level"]
urlPathways=pathways["URL"]
majorsPathways=pathways["Majors"]

display(pathways)

#### Research

In [9]:
idResearch=research["ID"]
titleResearch=research["Solicitation Title"]
status=research["Status"]
idResearch=research["Solicitation ID"]
urlResearch=research["URL"]

display(research)

#### Create Nodes

In [10]:
import pandas as pd
import chromadb
from chromadb.utils import embedding_functions

ostem = pd.read_csv("data/ostems.csv")

# Create Chroma client
client = chromadb.Client()

# Create embedding function (using default)
embedding_function = embedding_functions.DefaultEmbeddingFunction()

# Create collection
collection = client.create_collection(
    name="nasa_ostem",
    embedding_function=embedding_function,
    get_or_create=True
)

# Prepare data for Chroma
documents = ostem['Description'].tolist()
metadatas = ostem.apply(lambda x: {
    'id': str(x['ID']),
    'title': x['Title'],
    'url': x['URL'],
    'type': x['Type']
}, axis=1).tolist()
ids = ostem['ID'].astype(str).tolist()

# Add data to collection
collection.add(
    documents=documents,
    metadatas=metadatas,
    ids=ids
)


In [5]:
results = collection.query(
    query_texts=["internship opportunities"],
    n_results=3
)

display(results)


{'ids': [[]],
 'embeddings': None,
 'documents': [[]],
 'uris': None,
 'data': None,
 'metadatas': [[]],
 'distances': [[]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [11]:
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore

vector_store = ChromaVectorStore(chroma_collection=eventsCollection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [12]:
eventNodes = createBasicnodes(events)

node = [TextNode(
            text="12/4 “FIRST WOMEN Series” Live Q&A: NASA's 2024 Wings of Excellence Awardee Event Date: December 4, 2024, at 4 p.m. ET (Wednesday) Registration Deadline: November 18, 2024, at 11:59 p.m. ET (Monday) This year we are introducing a new series, 'FIRST WOMEN,' Virtual Classroom Connections. Women are making history at NASA every day and now we're giving you the chance to meet them. This series h...",
            metadata={
                "id": 1,
                "title": "12/4 “FIRST WOMEN Series” Live Q&A: NASA's 2024 Wings of Excellence Awardee",
                "url": "https://stemgateway.nasa.gov/s/course-offering/a0B3d0000025iVkEAI/124-first-women-series-live-qa-nasas-2024-wings-of-excellence-awardee",
                "type": "Educator",
            }
)]

In [13]:
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding(
    model="text-embedding-3-small",
)

In [14]:
# from langchain.embeddings import HuggingFaceEmbeddings
# from llama_index.embeddings.langchain import LangchainEmbedding

# lc_embed_model = HuggingFaceEmbeddings(
#     model_name="sentence-transformers/all-mpnet-base-v2"
# )
# embed_model = LangchainEmbedding(lc_embed_model)

In [20]:
index = VectorStoreIndex(eventNodes, storage_context=storage_context, embed_model=embed_model)

In [None]:
from IPython.display import Markdown

query_engine = index.as_query_engine()
response = query_engine.query("Events for woman")
display(Markdown(f"<b>{response}</b>"))

<b>The event mentioned in the context is part of the "FIRST WOMEN Series" Live Q&A, which features NASA's 2024 Wings of Excellence Awardee. This event provides an opportunity to meet women who are making history at NASA.</b>