In [1]:
from sentence_transformers import SentenceTransformer, util
import json

from chunkify import clean_json, chunkify, json_to_transcript
# Open json
with open("tmp.json") as f:
    chunks = json.load(f)

print(len(chunks))
chunks[0]

file_name = "courtney_nelson"
with open(f"data/{file_name}.json") as f:
    data = json.load(f)


data_clean = clean_json(data)
chunk_segments = chunkify(data_clean, "small")
chunks = [json_to_transcript(data_clean[s:e]).strip() for s,e in chunk_segments]
print(len(chunks))

130
23
3
3
4
2
3
3
2
2
2
4
3
2
2
2
3
3
8
18


In [18]:
chunks

["INTERVIEWER:\nSo you're not supposed to know and you're just like, no, not only do I know, I'm letting it fuel my ego.",
 "INTERVIEWER:\nOh, because I'm really close with Sasha.",
 'INTERVIEWER:\nSo how do you get a hold of the rankings?',
 "INTERVIEWER:\nNo, no, no, no. It's internal. It's internal.",
 'INTERVIEWER:\nThey rank you in front of each other?',
 "INTERVIEWER:\nI think we were a second to last. I think we were team number 11 and now we were team number four last week. I bet we're much higher this week, but they didn't rank us this week, which is unfortunate.",
 "INTERVIEWER:\nSo they're like, there's some doofuses.",
 "INTERVIEWER:\nOh, you didn't? Okay. So apparently during the team debrief on Monday. So according to the team debrief, so apparently all the partners, basically like Anne, Michael, whatever the fuck, Mike and all those people, they talk about each team. They're on Monday on the Monday meetings. And apparently we're called the Dark Horse team. The literal li

In [6]:

query = "What is said about referrals?"
# docs = ["Around 9 Million people live in London. The grass there is very pretty and there is great food", "London is known for its financial district", "I am a big taco fan. My favorite cities for tacos are"]
docs = chunks
#Load the model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device='cuda:3')

#Encode query and documents
query_emb = model.encode(query)
doc_emb = model.encode(docs)

#Compute dot score between query and all document embeddings
scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()

#Combine docs & scores
doc_score_pairs = list(zip(docs, scores))

#Sort by decreasing score
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)

#Output passages & scores
for doc, score in doc_score_pairs:
    print(score, doc)

0.49378344416618347 INTERVIEWER:
I see. If you don't mind, because I know that you probably don't have the figures off your head, but roughly people that you end up placing or hiring, do you have an idea of what percentage comes from what kind of channels? Like in terms of referral or just cold outreach sourcing in college?

INTERVIEWEE:
Yeah, so I would say about 30 to 35 percent of our hires are referred candidates. And then I, which is not necessarily the normal answer you'll get from a lot of people, because I've worked at other organizations where five to 10 percent have come from referrals, or there's other organizations where all of them come. It depends on how large the company is. But 30 to 35 percent of 09's employees come from referrals. And then LinkedIn is probably the bulk of the remainder. Well, LinkedIn is a portion. I don't have a rough estimate, but I would say we probably find, I'll say we'll probably find 20 percent from colleges and universities through internships

In [10]:
import openai
import dotenv
import os


dotenv.load_dotenv()
openai.api_key = os.environ.get("OPENAI_KEY1")


In [11]:
semantic_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device='cuda:3')


def search(query):

    # Encode query and documents
    query_emb = semantic_model.encode(query)
    doc_emb = semantic_model.encode(docs)

    #Compute dot score between query and all document embeddings
    scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()

    #Combine docs & scores
    doc_score_pairs = list(zip(docs, scores))

    #Sort by decreasing score
    doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
    
    
    # Feed into gpt
    prompt = f"Please answer the following question: {query}\n\nHere is the context:\n\n\n"
    
    for doc, score in doc_score_pairs[:5]:
        prompt += f"{doc}\n\n"


    completion = openai.Completion.create(
        model="text-davinci-003",
        prompt=prompt,
        temperature=.7,
        max_tokens=256,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    gpt_output = completion.choices[0].text.strip()
    return gpt_output


out = search("What is said about referrals?")

In [14]:
print(out)

In short, referrals are a common source of candidates for many organizations, with some companies reporting up to 35% of hires coming from referrals. Referrals often happen between recruiters and agencies, where recruiters will share candidate information with other companies. In-house corporate recruiting teams may occasionally share candidate information with other companies in order to connect them with potential roles, but this is less frequent.
