# Install and load the libraries.

In [1]:
# !pip install -q chromadb==0.4.22
# !pip install -q langchain==0.1.4
# !pip install -q sentence_transformers==2.3.0
# !pip install -q accelerate==0.26.1

# Load the Dataset

In [2]:
import sys
sys.path.append('../data')

from resume_templates import RESUME_TEMPLATES

In [3]:
RESUME_TEMPLATES[0:2]

[{'input': '\n"work": [\n  {\n    "highlights": [\n      "Painted murals for a few local cafes — they gave me free reign on design",\n      "Also made digital illustrations for some small online businesses",\n      "One client asked for revisions last-minute and I had to totally redo the layout"\n    ]\n  }\n]\n',
  'output': '\n"work": [\n  {\n    "highlights": [\n      "Commissioned to design and paint large-scale murals for local businesses, incorporating custom themes and branding",\n      "Produced digital illustrations for e-commerce clients, enhancing their visual identity across web and social media",\n      "Adapted quickly to client feedback by redesigning a complete layout under a tight deadline, meeting final approval within 24 hours"\n    ]\n  }\n]\n'},
 {'input': '\n"work": [\n  {\n    "highlights": [\n      "Made a small game in Unity for a class project with 3 friends — I did most of the level design",\n      "We didn’t use any assets at first but then added some from t

In [4]:
len(RESUME_TEMPLATES)

12

# CREATE THE DOCUMENT FROM THE DATAFRAME

In [5]:
from langchain.schema import Document

In [6]:
# Convert your JSON list to LangChain Documents

import json

documents = [
    Document(
        page_content=json.dumps(item, indent=2) # Store the full object as JSON string
    )
    for item in RESUME_TEMPLATES
]

In [7]:
display(documents[:2])

[Document(page_content='{\n  "input": "\\n\\"work\\": [\\n  {\\n    \\"highlights\\": [\\n      \\"Painted murals for a few local cafes \\u2014 they gave me free reign on design\\",\\n      \\"Also made digital illustrations for some small online businesses\\",\\n      \\"One client asked for revisions last-minute and I had to totally redo the layout\\"\\n    ]\\n  }\\n]\\n",\n  "output": "\\n\\"work\\": [\\n  {\\n    \\"highlights\\": [\\n      \\"Commissioned to design and paint large-scale murals for local businesses, incorporating custom themes and branding\\",\\n      \\"Produced digital illustrations for e-commerce clients, enhancing their visual identity across web and social media\\",\\n      \\"Adapted quickly to client feedback by redesigning a complete layout under a tight deadline, meeting final approval within 24 hours\\"\\n    ]\\n  }\\n]\\n"\n}'),
 Document(page_content='{\n  "input": "\\n\\"work\\": [\\n  {\\n    \\"highlights\\": [\\n      \\"Made a small game in Unity

# Creating the embeddings

We load the library to create the pre trained model from HuggingFace to create the embeddings from sentences.


In [8]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


# Creating the Index With Chroma
Here we are creating the index of embeddings. Using the document, and the embedding function created above.

In [9]:
from langchain.vectorstores import Chroma

In [10]:
chroma_db = Chroma.from_documents(
    documents, # using the entire document 
    embedding_function
)

# Query documents from Vector DB

In [11]:
query_1 = "python data analysis visualization"
query_2 = "nlp data analysis"

In [12]:
results_1 = chroma_db.similarity_search(query_1, k=1)
results_2 = chroma_db.similarity_search(query_2, k=1)

In [13]:
print(results_1[0])

page_content='{
  "input": "\n\"work\": [\n  {\n    \"highlights\": [\n      \"Worked on this project at my internship where we had to clean a ton of messy sales data \u2014 I wrote a few scripts in Python to fix formatting and missing stuff\",\n      \"I also helped make some visuals in Tableau to show trends to the marketing team\",\n      \"At one point, we had a bug that was breaking everything \u2014 I figured out it was due to mismatched date formats\"\n    ]\n  }\n]\n",
  "output": "\n\"work\": [\n  {\n    \"highlights\": [\n      \"Developed Python scripts for data cleaning and transformation, reducing manual data wrangling time by 40%\",\n      \"Created interactive Tableau dashboards to visualize sales trends and support marketing decision-making\",\n      \"Identified and resolved critical data pipeline issue related to inconsistent date formats, ensuring report accuracy\"\n    ]\n  }\n]\n"
}'


In [14]:
results_1_json = json.loads(results_1[0].page_content)
results_2_json = json.loads(results_2[0].page_content)
print(json.dumps(results_1_json, indent=2))
print(json.dumps(results_2_json, indent=2))

{
  "input": "\n\"work\": [\n  {\n    \"highlights\": [\n      \"Worked on this project at my internship where we had to clean a ton of messy sales data \u2014 I wrote a few scripts in Python to fix formatting and missing stuff\",\n      \"I also helped make some visuals in Tableau to show trends to the marketing team\",\n      \"At one point, we had a bug that was breaking everything \u2014 I figured out it was due to mismatched date formats\"\n    ]\n  }\n]\n",
  "output": "\n\"work\": [\n  {\n    \"highlights\": [\n      \"Developed Python scripts for data cleaning and transformation, reducing manual data wrangling time by 40%\",\n      \"Created interactive Tableau dashboards to visualize sales trends and support marketing decision-making\",\n      \"Identified and resolved critical data pipeline issue related to inconsistent date formats, ensuring report accuracy\"\n    ]\n  }\n]\n"
}
{
  "input": "\n      \"work\": [\n        {\n          \"highlights\": [\n           \"I messed 

# Payload

In [15]:
# return results_1_json, results_2_json as a list

results = [
    results_1_json,
    results_2_json
]

print(json.dumps(results, indent=2))

[
  {
    "input": "\n\"work\": [\n  {\n    \"highlights\": [\n      \"Worked on this project at my internship where we had to clean a ton of messy sales data \u2014 I wrote a few scripts in Python to fix formatting and missing stuff\",\n      \"I also helped make some visuals in Tableau to show trends to the marketing team\",\n      \"At one point, we had a bug that was breaking everything \u2014 I figured out it was due to mismatched date formats\"\n    ]\n  }\n]\n",
    "output": "\n\"work\": [\n  {\n    \"highlights\": [\n      \"Developed Python scripts for data cleaning and transformation, reducing manual data wrangling time by 40%\",\n      \"Created interactive Tableau dashboards to visualize sales trends and support marketing decision-making\",\n      \"Identified and resolved critical data pipeline issue related to inconsistent date formats, ensuring report accuracy\"\n    ]\n  }\n]\n"
  },
  {
    "input": "\n      \"work\": [\n        {\n          \"highlights\": [\n       