In [None]:
import pandas as pd

df = pd.read_json('https://storage.googleapis.com/swe-workshop-23/organizations.json')
df

In [None]:
nebula = df[df['title'] == 'Nebula Labs']
nebula

In [None]:
from io import BytesIO
from PIL import Image
from base64 import b64decode

encoded_image = nebula['picture_data'].iloc[0]
Image.open(BytesIO(b64decode(encoded_image)))

In [None]:
# format as documents

def format_as_doc(row):
    return f"Title: {row['title']}\nCategories: {', '.join(row['categories'])}\nDescription: {row['description']}\n"


df['content'] = df.apply(format_as_doc, axis=1)

nebula = df[df['title'] == 'Nebula Labs']

print(nebula['content'].iloc[0])

In [None]:
from langchain.llms import HuggingFaceTextGenInference

hf = HuggingFaceTextGenInference(
    inference_server_url="https://hvxgjd4o670aom-8080.proxy.runpod.net/",
    max_new_tokens=512,
    do_sample=True,
    top_k=5,
)

response = hf("Hi! My name is Amrit! I'm a member of Nebula Labs, a student organization on campus dedicated to Open-Source projects built by students, for students. Today I will be talking to you about ")
print(response)

In [None]:
from langchain.schema import StrOutputParser

from langchain.prompts import PromptTemplate

prompt = PromptTemplate.from_template("""You are a student organization recommendation assistant. Given the user's interests, recommend a student organization on campus.

User interests: {interests}

Recommendation: """)

chain = prompt | hf | StrOutputParser()
response = chain.invoke({"interests": "computer science, soccer"})
print(response)

In [None]:
from langchain.embeddings.fastembed import FastEmbedEmbeddings

from langchain.vectorstores import Chroma

from langchain.document_loaders import DataFrameLoader

loader = DataFrameLoader(df[["title", "content"]], page_content_column="content")
documents = loader.load()
embeddings = FastEmbedEmbeddings(max_length=512)
docsearch = Chroma.from_documents(documents, embeddings)

In [None]:
docsearch.similarity_search("computer science, soccer", k=5)

In [None]:
from langchain.schema.runnable import RunnablePassthrough

retriever = docsearch.as_retriever(search_kwargs={"k": 5})

prompt = PromptTemplate.from_template(
    """You are a student organization recommendation assistant. Given the user's
    interests and some relevent search results from the campus student
    organization directory, recommend a student organization on campus as plain
    text, and give a short description of the organization.

User Interests: {interests}

Search Results:
=============
{context}
=============

Given the users interests and some relvent search results from the campus student organization directory, the recommended organization is """
)


def format_docs(docs):
    return "\n\n\n".join([d.page_content for d in docs])


chain = (
    {"context": retriever | format_docs, "interests": RunnablePassthrough()}
    | prompt
    | hf
    | StrOutputParser()
)

In [None]:
response = chain.invoke("women in stem")
print(response)