In [1]:
from flask import jsonify

In [13]:
from langchain.vectorstores     import Chroma
from langchain.document_loaders import DataFrameLoader
from pandas                     import read_csv
from langchain.embeddings       import HuggingFaceEmbeddings


In [9]:
temp = read_csv('document.csv')
temp.head()

Unnamed: 0,course_name,o_summarized,course_code
0,Bachelor of Nursing (Honours),The Bachelor's Degree is an undergraduate prog...,H421
1,Graduate Certificate of Intensive Care Nursing,The Bachelor's Degree is an undergraduate prog...,H545
2,Graduate Diploma of Emergency Nursing,The Graduate Diploma of Emergency Nursing is a...,H666
3,Bachelor of Science (Honours),The Bachelor of Science Honours program at Dea...,S400
4,Graduate Diploma of Cardiac Nursing,The Graduate Diploma Cardiac Nursing program a...,H665


In [11]:
loader = DataFrameLoader(temp, page_content_column='course_name') 
document = loader.load_and_split()

In [14]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
db = Chroma.from_documents(document, embeddings,)

  from .autonotebook import tqdm as notebook_tqdm


In [91]:
def getSimilarCouses(query):
    results = db.similarity_search_with_score(query, 10)
    selected = []
    
    for doc, score in results:
        if score <= 0.95:
            selected.append(doc.metadata['course_code'])
    
    return selected

In [92]:
getSimilarCouses('Could you tell me about courses related to AI and Data Science?')

['S308', 'S536', 'S770', 'S379', 'S777']

In [93]:
db.similarity_search_with_score('give me information about horses related to the eye and their designs', 10)

[(Document(page_content='Graduate Diploma of Museum Studies', metadata={'o_summarized': "The Graduate Diploma of Museum Studies is designed for individuals who want to enter the museum sector and gain hands-on skills required such as interpretation, collections management, cataloguing/documentation, exhibition planning, budgets & policy development, audience involvement in exhibitions. The program also offers elective units including World Heritage Intangible Cultural Heritage Digital Interpreting Applied Heritage Project Internship Curating digital objects from a collection of the world's most valuable cultural heritage items such as ancient manuscripts and rare books or artworks that are now housed at museums around Australia", 'course_code': 'A668'}),
  1.7396718263626099),
 (Document(page_content='Master of Cultural Heritage and Museum Studies', metadata={'o_summarized': "The Master's program at Deakin University is a comprehensive training that offers students the opportunity to s

In [76]:
help(db.similarity_search_with_score)

Help on method similarity_search_with_score in module langchain.vectorstores.chroma:

similarity_search_with_score(query: 'str', k: 'int' = 4, filter: 'Optional[Dict[str, str]]' = None, **kwargs: 'Any') -> 'List[Tuple[Document, float]]' method of langchain.vectorstores.chroma.Chroma instance
    Run similarity search with Chroma with distance.
    
    Args:
        query (str): Query text to search for.
        k (int): Number of results to return. Defaults to 4.
        filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
    
    Returns:
        List[Tuple[Document, float]]: List of documents most similar to the query
            text with distance in float.



In [104]:
db.similarity_search_with_score('Please tell me about Master of Applied Artificial Intelligence (Professional)', 10)[0]

(Document(page_content='Master of Applied Artificial Intelligence (Professional)', metadata={'o_summarized': 'The Master\'s degree program "Master of Applied Artificial Intelligence (Professional)" is designed for individuals with a strong interest or background in artificial intelligence and its applications to software development.', 'course_code': 'S737'}),
 0.13220003247261047)

In [31]:
temp_ = []
for i in db.similarity_search('Could you tell me about courses related to AI and Data Science?', 10):
    temp_.append(i.metadata['course_code'])

temp_

['S308',
 'S536',
 'S770',
 'S379',
 'S777',
 'S737',
 'S677',
 'S479',
 'M553',
 'S306']

In [33]:
from requests import post

In [49]:
help(db.get)

Help on method get in module langchain.vectorstores.chroma:

get(include: 'Optional[List[str]]' = None) -> 'Dict[str, Any]' method of langchain.vectorstores.chroma.Chroma instance
    Gets the collection.
    
    Args:
        include (Optional[List[str]]): List of fields to include from db.
            Defaults to None.



In [61]:
temp[temp['course_code']=='S737'].o_summarized.values[0]

'The Master\'s degree program "Master of Applied Artificial Intelligence (Professional)" is designed for individuals with a strong interest or background in artificial intelligence and its applications to software development.'

In [102]:
res = post('http://172.17.0.2:8891/getCourses', json={'question':'Please tell me about Master of Applied Artificial Intelligence (Professional'})
res.json()['course_codes']

['S737',
 'S736',
 'S308',
 'S536',
 'S779',
 'S770',
 'S776',
 'S751',
 'S739',
 'E737']

In [64]:
res = post('http://172.17.0.2:8891/getCourses', json={'question':'Could you tell me about courses related to Neural Networks?'})
res.json()['course_codes']

['S308',
 'S536',
 'S737',
 'S770',
 'S736',
 'S777',
 'S700',
 'S751',
 'S735',
 'M553']

In [101]:
res = post('http://172.17.0.2:8891/courseInfo', json={'course_code':'S308'})
res.json()['course_summary']

"The Bachelor's program at Deakin University offers students hands-on experience developing intelligent systems through their exposure to cutting edge research and technology advancements such as AI. The course is designed for individuals with a minimum of 100 hours industry work experience who are looking forward in the field that will enable them develop new skills required by businesses, create innovative solutions using artificial intelligence (AI), design software applications powered by machine learning algorithms or other advanced technologies to solve real-world problems and make significant contributions towards shaping our future."