In [1]:
import json
import requests

url = "https://whova.com/xems/apis/event_webpage/agenda/public/get_agendas/?event_id=hNiJdTa6x0fXwYih-GVzq-m8-pq5dthqOzaSMWeJL5g%3D"
conference_data = requests.get(url).json()

with open('./data/raw.json', 'w') as json_file:
    json.dump(conference_data, json_file, indent=4)

session_data = []

for day in conference_data['data']['agenda']:
  for time_outer in day['time_ranges']:
    for time_inner in time_outer[1]:
      for time_slot in time_inner:
        for session in time_slot['sessions']:
          session_data.append(session)

def get_speaker_info(speaker):
  if 'Speakers' in speaker:
    speakers = speaker['Speakers']
  elif 'Speaker' in speaker: 
    speakers = speaker['Speaker']
  else:
    speakers = []
  return [sp['name'] for sp in speakers]

sessions = [{
  "title": s['name'],
  'abstract': s['desc'] if 'desc' in s else '',
  'room': s['place'] if 'place' in s else '',
  'start_time': s['calendar_stime'],
  'end_time': s['calendar_etime'],
  'track': s['tracks'][0]['name'] if 'tracks' in s and len(s['tracks']) > 0 else '',
  'speakers': get_speaker_info(s['speaker']) if 'speaker' in s else []
} for s in session_data]

with open('./data/sessions.json', 'w') as json_file:
    json.dump([s for s in sessions if 'abstract' in s and len(s['abstract']) > 0], json_file, indent=4)

In [5]:
from langchain.vectorstores import FAISS
from langchain.schema import Document

documents = []
for session in sessions:
    doc = Document(
        page_content = session['abstract'],
        metadata = {
            'title': session['title'],
            'speakers': session['speakers'],
            'room': session['room'],
            'start_time': session['start_time'],
            'end_time': session['end_time']
        }
    )
    documents.append(doc)

In [6]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
embedding_model = "openai"

In [10]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()
embedding_model = "huggingface"

In [11]:
vector_store = FAISS.from_documents(documents, embeddings)
vector_store.save_local("./data/nebraska-code", embedding_model)