## Load embeddings to database

### IMPORT

In [1]:
import openai
import PyPDF2
import requests
from io import StringIO
import pandas as pd
from supabase import create_client, Client
import re
import os
import json
import asyncio
from langchain.text_splitter import RecursiveCharacterTextSplitter

from functions import extract_and_chunk_pdf, get_embeddings, match_documents, upload_to_database

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


#### OPENAI KEY

In [2]:
# Define path
path_to_key = "../secrets/openai_key.json"

# Get key
with open(path_to_key, "r") as f:
    openai_secrets = json.load(f)
    f.close()

# Define key
openai.api_key = openai_secrets['openai_api_key']

#### SUPABASE KEY

In [3]:
## IMPORT superbase keys 
# Define path
path_to_sup_key = "../secrets/supabase_key.json"

with open(path_to_sup_key, 'r') as file:
    config = json.load(file)

supabase_url = config["project_url"]
# supabase_key = config["api_key"]
supabase_service_key = config["service_role_key"]

supabase: Client = create_client(supabase_url, supabase_service_key)

#### GET SECTIONS FROM PDF

In [None]:
# Path for text data
path = './chatbot-text.pdf'
chunks = extract_and_chunk_pdf(path, 1500, 300)

In [None]:
get_embeddings(chunks[0])

#### UPLOAD IN SUPABASE DB

In [5]:
path = './chatbot-text.pdf'
upload_to_database(path, 1200, 200, supabase)

2024-02-07 09:10:22,882:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:23,486:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 0


2024-02-07 09:10:24,965:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:25,143:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 1


2024-02-07 09:10:26,992:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:27,426:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 2


2024-02-07 09:10:27,822:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:28,262:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 3


2024-02-07 09:10:31,592:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:31,842:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 4


2024-02-07 09:10:32,272:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:32,916:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 5


2024-02-07 09:10:33,825:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:34,395:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 6


2024-02-07 09:10:34,895:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:35,241:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 7


2024-02-07 09:10:35,657:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:35,832:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 8


2024-02-07 09:10:36,183:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:36,366:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 9


2024-02-07 09:10:36,645:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:37,196:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 10


2024-02-07 09:10:37,747:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:37,938:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 11


2024-02-07 09:10:38,245:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:38,450:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 12


2024-02-07 09:10:38,740:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:38,949:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 13


2024-02-07 09:10:39,316:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:39,508:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 14


2024-02-07 09:10:42,147:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:42,371:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 15


2024-02-07 09:10:43,217:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:43,391:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 16


2024-02-07 09:10:46,554:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:46,992:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 17


2024-02-07 09:10:47,666:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:48,314:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 18


2024-02-07 09:10:49,574:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:50,523:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 19


2024-02-07 09:10:53,022:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:53,186:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 20


2024-02-07 09:10:53,939:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:54,111:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 21


2024-02-07 09:10:54,571:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:54,799:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 22


2024-02-07 09:10:55,562:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:55,699:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 23


2024-02-07 09:10:56,409:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:56,573:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 24


2024-02-07 09:10:56,966:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:57,183:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 25


2024-02-07 09:10:57,522:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:57,709:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 26


2024-02-07 09:10:58,111:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:58,277:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 27


2024-02-07 09:10:58,680:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:58,825:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 28


2024-02-07 09:10:59,153:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:10:59,314:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 29


2024-02-07 09:10:59,596:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-02-07 09:11:00,100:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/documents2 "HTTP/1.1 201 Created"


Successfully uploaded chunk 30


#### TEST

In [6]:
# Example question
question = get_embeddings("In welchem Bereich machen Sie Ihren Master?")

2024-02-07 09:14:29,984:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 500 Internal Server Error"
2024-02-07 09:14:29,986:INFO - Retrying request to /embeddings in 0.882481 seconds
2024-02-07 09:14:31,181:INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [7]:
# Ausführen der asynchronen Funktion
result = await match_documents(question, 0.5, 1)
print(result.data)

2024-02-07 09:14:37,366:INFO - HTTP Request: POST https://anltjabuirxieouelkhe.supabase.co/rest/v1/rpc/match_documents "HTTP/1.1 200 OK"


[{'id': 79, 'content': 'in fast allen Bereichen von Wirtschaft, Wissenschaft und Alltag eine enorme Bedeutung. Der Masterstudiengang Statistik macht Sie zum Profi im Umgang mit Statistiken jeder Art. Sie befassen sich mit den mathematischen Grundlagen und Methoden empirischer Forschung und lernen, wie man Statistiken erstellt und die Aussagen von Statistiken kritisch beurteilen kann. Die Themenbereiche des Studiums umfassen Wahrscheinlichkeitstheorie und mathematische Statistik, Spezialgebiete der Statistik, Gebiet der stochastischen Prozesse oder Zeitreihenanalyse. Der Masterstudiengang Statistik ist ein gemeinsamer Studiengang der Humboldt-Universität zu Berlin, der Freien Universität Berlin und der Technischen Universität Berlin. Master Overview: EDUCATION Humboldt University of Berlin – M.Sc. Statistics, Berlin (GER) • Current GPA: 1.9 (on a scale from 1.0 – 5.0) • Major subjects: Data Science & Machine Learning, Statistical Inference (October 2021 to August 2024) . Aktuelle Notenü

In [8]:
result.data[0]['content']

'in fast allen Bereichen von Wirtschaft, Wissenschaft und Alltag eine enorme Bedeutung. Der Masterstudiengang Statistik macht Sie zum Profi im Umgang mit Statistiken jeder Art. Sie befassen sich mit den mathematischen Grundlagen und Methoden empirischer Forschung und lernen, wie man Statistiken erstellt und die Aussagen von Statistiken kritisch beurteilen kann. Die Themenbereiche des Studiums umfassen Wahrscheinlichkeitstheorie und mathematische Statistik, Spezialgebiete der Statistik, Gebiet der stochastischen Prozesse oder Zeitreihenanalyse. Der Masterstudiengang Statistik ist ein gemeinsamer Studiengang der Humboldt-Universität zu Berlin, der Freien Universität Berlin und der Technischen Universität Berlin. Master Overview: EDUCATION Humboldt University of Berlin – M.Sc. Statistics, Berlin (GER) • Current GPA: 1.9 (on a scale from 1.0 – 5.0) • Major subjects: Data Science & Machine Learning, Statistical Inference (October 2021 to August 2024) . Aktuelle Notenübersicht: Abschluss: [8