# SJ Chatbot: Save Embeddings

## Set up environment

In [1]:
!pip install numpy | findstr /V /C:"Requirement already satisfied"
!pip install pandas | findstr /V /C:"Requirement already satisfied"
!pip install openai==1.3.7 | findstr /V /C:"Requirement already satisfied"

import numpy as np
import pandas as pd
import openai
import os
from openai import OpenAI

os.environ["OPENAI_API_KEY"] = "XXX"

client = OpenAI()

## Read-in paragraphs

In [2]:
paragraphs = pd.read_csv('final_paragraph_text.csv', encoding= 'unicode_escape')
paragraphs.head(6)

Unnamed: 0,paragraphs
0,About us. Saint James Backpackers is a family ...
1,A new modern communal kitchen. Free Breakfast ...
2,"Rooms. In the hostel, we have a total of 104 b..."
3,New staff members arriving at SJB hostel shoul...
4,The rota changes. The rota detailing staff shi...
5,"At SJB, all volunteers are required to work 5 ..."


## Get embeddings

In [3]:
paragraph = paragraphs.paragraphs[0]

def get_embeddings(paragraph):
    doc_vector = client.embeddings.create(
                    input=paragraph,
                    model= "text-embedding-ada-002"
                ).data[0].embedding
    
    return doc_vector

embeddings = []
for i in range(len(paragraphs)):
    embeddings.append(get_embeddings(paragraphs.paragraphs[i]))

## Add to data frame

In [4]:
paragraphs = paragraphs.assign(embeddings=embeddings)

## View embeddings

In [5]:
for i in range(len(paragraphs)):
    print(paragraphs.iloc[i,:])
    print('')
    print('')

paragraphs    About us. Saint James Backpackers is a family ...
embeddings    [0.02090136520564556, 0.01436216663569212, -0....
Name: 0, dtype: object


paragraphs    A new modern communal kitchen. Free Breakfast ...
embeddings    [-0.003322854870930314, 0.0005134791717864573,...
Name: 1, dtype: object


paragraphs    Rooms. In the hostel, we have a total of 104 b...
embeddings    [0.005651872139424086, 0.010156122036278248, -...
Name: 2, dtype: object


paragraphs    New staff members arriving at SJB hostel shoul...
embeddings    [-0.004849358927458525, -0.015835190191864967,...
Name: 3, dtype: object


paragraphs    The rota changes. The rota detailing staff shi...
embeddings    [0.005587662570178509, -0.03242306038737297, -...
Name: 4, dtype: object


paragraphs    At SJB, all volunteers are required to work 5 ...
embeddings    [-0.008781889453530312, -0.009302792139351368,...
Name: 5, dtype: object


paragraphs    Computer System. There are currently two compu...
embeddings    [-0.

## Drop problematic row

In [6]:
paragraphs.drop(90, axis=0, inplace=True)

## Check problematic row dropped

In [7]:
paragraphs.reset_index(drop=True, inplace=True)
paragraphs.iloc[90:100,:]

Unnamed: 0,paragraphs,embeddings
90,The cleaning duties primarily involve cleaning...,"[0.009459886699914932, -0.0028507232200354338,..."
91,Day support resonsibilities: 8am-3pm. Greet gu...,"[0.005165849346667528, -0.006262844428420067, ..."
92,Evening clean responsibilities. Empty the tras...,"[0.012120752595365047, -0.0005316417664289474,..."
93,8am-9am :. Report to reception and ask if ther...,"[0.004247819073498249, 0.01066502369940281, -0..."
94,Reception assistant responsibilities: Cut keys...,"[-0.014371286146342754, -0.0014860449591651559..."
95,"Clean the dishwasher, microwave, toaster. Keep...","[0.01596343331038952, -0.005075294524431229, 0..."
96,Breaks during shifts. Given the reception shif...,"[-0.0027452774811536074, 0.0031304939184337854..."
97,Prices for various items. We charge £1 for ear...,"[0.014128339476883411, 0.006703752558678389, -..."
98,Banning guests. Guests who consistently refuse...,"[0.009622059762477875, -0.003595540300011635, ..."
99,Processing key deposit refunds. To process key...,"[-0.003835563315078616, 0.007750210352241993, ..."


## Save as CSV

In [8]:
paragraphs.to_csv('embeddings.csv', index=False)