In [1]:
def get_wikis():
    # data frame instance for storing each row
    dataFrame = pd.DataFrame(columns=['id', 'summary', 'content'])
    
    # looping for fetching all available wikis fron DB
    # manually tested for max ranges of limit & offeset,
    # came out to be limit = 50 & offeset = 1337
    for offset in range(0,1340,50):
        # graphql db url for everpedia wikis
        url = "https://graph.everipedia.org/graphql"
        # payload 
        query = """{
                      wikis(limit:50,offset:"""+str(offset)+""") {
                      id
                      title
                      content
                    }
                }"""
        
        # sending request
        response = requests.post(url=url,json={"query":query})
        #time.sleep(0.5)
        # storing newly fetched data into a data frame
        freshDf = pd.DataFrame(response.json()['data']['wikis'])
        
        # concatenating both old & new dataframes as one
        dataFrame = pd.concat([dataFrame, freshDf],axis=0,ignore_index=True)
        
    # DB may return same wiki multiple times from each request, so drop duplicates if any  
    dataFrame = dataFrame.drop_duplicates(subset=['id']) # id is the unique identifier
    
    return dataFrame

In [3]:
!pip install pandas

Collecting pandas
  Downloading pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m272.8 kB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
Installing collected packages: pandas
Successfully installed pandas-1.5.3
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [4]:
import pandas as pd
import requests
import time

In [5]:
df = get_wikis()

In [6]:
df

Unnamed: 0,id,summary,content,title
0,lens-protocol,,"**Lens Protocol** is a composable, decentraliz...",Lens Protocol
1,makerdao,,**MakerDAO** is a decentralized global reserve...,MakerDAO
2,katie-haun,,**Katie Haun** is the Chief Executive Officer ...,Katie Haun
3,balaji-srinivasan,,"**Balaji S. Srinivasan** is an angel investor,...",Balaji Srinivasan
4,sewer-pass,,**Sewer Passes** are [non-fungible tokens (NFT...,Sewer Pass
...,...,...,...,...
1345,daostack,,**DAOstack** (founded 2017) is an open source ...,DAOstack
1346,alyze-sam,,**Alyze Sam** is a blockchain strategist and t...,Alyze Sam
1347,alyse-killeen,,"**Alyse Killeen** is an American entrepreneur,...",Alyse Killeen
1348,allison-lu,,**Allison Lu** is a blockchain expert that is ...,Allison Lu


In [7]:
df = df.drop(df.columns[1], axis=1)

In [8]:
df

Unnamed: 0,id,content,title
0,lens-protocol,"**Lens Protocol** is a composable, decentraliz...",Lens Protocol
1,makerdao,**MakerDAO** is a decentralized global reserve...,MakerDAO
2,katie-haun,**Katie Haun** is the Chief Executive Officer ...,Katie Haun
3,balaji-srinivasan,"**Balaji S. Srinivasan** is an angel investor,...",Balaji Srinivasan
4,sewer-pass,**Sewer Passes** are [non-fungible tokens (NFT...,Sewer Pass
...,...,...,...
1345,daostack,**DAOstack** (founded 2017) is an open source ...,DAOstack
1346,alyze-sam,**Alyze Sam** is a blockchain strategist and t...,Alyze Sam
1347,alyse-killeen,"**Alyse Killeen** is an American entrepreneur,...",Alyse Killeen
1348,allison-lu,**Allison Lu** is a blockchain expert that is ...,Allison Lu


In [9]:
df.rename(columns={df.columns[0]: 'wikiid'}, inplace=True)

In [10]:
df

Unnamed: 0,wikiid,content,title
0,lens-protocol,"**Lens Protocol** is a composable, decentraliz...",Lens Protocol
1,makerdao,**MakerDAO** is a decentralized global reserve...,MakerDAO
2,katie-haun,**Katie Haun** is the Chief Executive Officer ...,Katie Haun
3,balaji-srinivasan,"**Balaji S. Srinivasan** is an angel investor,...",Balaji Srinivasan
4,sewer-pass,**Sewer Passes** are [non-fungible tokens (NFT...,Sewer Pass
...,...,...,...
1345,daostack,**DAOstack** (founded 2017) is an open source ...,DAOstack
1346,alyze-sam,**Alyze Sam** is a blockchain strategist and t...,Alyze Sam
1347,alyse-killeen,"**Alyse Killeen** is an American entrepreneur,...",Alyse Killeen
1348,allison-lu,**Allison Lu** is a blockchain expert that is ...,Allison Lu


In [11]:
import re
from typing import List

HEADING_REGEX = re.compile(r'^#+ .*$')
WIDGET_REGEX = re.compile(r'^\$\$widget\d(.*?\))\$\$$')
MARKDOWN_LINK_REGEX = re.compile(r'^\[(.*)\]\(.*\)$')
MARKDOWN_IMAGE_REGEX = re.compile(r'^!\[.*\]\(.*\)$')
CITATION_REGEX = re.compile(r'\[\\\[\d+\\\]\]\(#cite-id-[a-z0-9]+\)', flags=re.MULTILINE)

MAX_PARA_COUNT = 3
MAX_CONTENT_LENGTH = 10000


def sanitize_content(content: str) -> str:
    
    """
    This function takes a string `content` that represents a Markdown document.
    It splits the document into paragraphs, filters out any paragraphs that do
    not contain valid text, removes Markdown syntax from the remaining paragraphs,
    and joins them back together into a string. The resulting string is trimmed
    to a maximum length and returned.
    """
        
    content_paragraphs = content.split('\n\n')
    filtered_paragraphs = [
        p for p in content_paragraphs if is_valid_paragraph(p)
    ]
    sanitized_paragraphs = []
    count = 0

    for paragraph in filtered_paragraphs:
        if count >= MAX_PARA_COUNT:
            break

        trimmed_paragraph = paragraph.strip()
        plain_text_paragraph = remove_markdown_syntax(trimmed_paragraph)
        sanitized_paragraph = CITATION_REGEX.sub('', plain_text_paragraph)
        is_heading = HEADING_REGEX.match(trimmed_paragraph) is not None

        if not is_heading:
            sanitized_paragraphs.append(sanitized_paragraph)
            count += 1
        else:
            sanitized_paragraphs.append(trimmed_paragraph)

    trimmed_content = '\n\n'.join(sanitized_paragraphs)[:MAX_CONTENT_LENGTH]

    return trimmed_content


def is_valid_paragraph(paragraph: str) -> bool:
    """
    This function takes a string `paragraph` and checks if it is a valid
    paragraph. A valid paragraph is one that contains text (i.e., is not empty),
    and does not contain Markdown image syntax, link syntax, or widget syntax.
    """
        
    para = paragraph.strip()
    return (
        len(para) != 0
        and not MARKDOWN_IMAGE_REGEX.match(para)
        and not MARKDOWN_LINK_REGEX.match(para)
        and not WIDGET_REGEX.match(para)
    )


def remove_markdown_syntax(text: str) -> str:
    
    """
    This function takes a string `paragraph` and checks if it is a valid
    paragraph. A valid paragraph is one that contains text (i.e., is not empty),
    and does not contain Markdown image syntax, link syntax, or widget syntax.
    """
        
    # Remove emphasis syntax
    text = re.sub(r'\*\*?(.*?)\*\*?', r'\1', text)
    text = re.sub(r'__(.*?)__', r'\1', text)

    # Remove code syntax
    text = re.sub(r'`(.+?)`', r'\1', text)

    # Remove links
    text = re.sub(r'\[(.*?)\]\((.*?)\)', r'\1', text)

    # Remove images
    text = re.sub(r'!\[(.*?)\]\((.*?)\)', r'', text)

    return text

def clean(string):
    sample = string

    cleaned = re.sub(r'\xa0',' ',
                    re.sub(r'\s{2,}',' ',
                       re.sub(r'\>','',
                              re.sub(r'\$\$widget0 YOUTUBE@VID\$\$','',
                                    re.sub(r'[#*]+','',
                                          re.sub(r'\\\[\d+\\\]','',
                                                sample))))))
    return cleaned

In [12]:
df.content = df.content.apply(sanitize_content)

In [13]:
df.head()

Unnamed: 0,wikiid,content,title
0,lens-protocol,"Lens Protocol is a composable, decentralized s...",Lens Protocol
1,makerdao,MakerDAO is a decentralized global reserve ban...,MakerDAO
2,katie-haun,Katie Haun is the Chief Executive Officer and ...,Katie Haun
3,balaji-srinivasan,"Balaji S. Srinivasan is an angel investor, Ame...",Balaji Srinivasan
4,sewer-pass,Sewer Passes are non-fungible tokens (NFTs) th...,Sewer Pass


In [14]:
df.content = df.content.apply(clean)

In [15]:
df.head()

Unnamed: 0,wikiid,content,title
0,lens-protocol,"Lens Protocol is a composable, decentralized s...",Lens Protocol
1,makerdao,MakerDAO is a decentralized global reserve ban...,MakerDAO
2,katie-haun,Katie Haun is the Chief Executive Officer and ...,Katie Haun
3,balaji-srinivasan,"Balaji S. Srinivasan is an angel investor, Ame...",Balaji Srinivasan
4,sewer-pass,Sewer Passes are non-fungible tokens (NFTs) th...,Sewer Pass


In [16]:
df.iloc[0, 1]

'Lens Protocol is a composable, decentralized social graph on the Polygon proof -of-stake (PoS) blockchain allowing creators to control their content and data. It is a decentralized social network with a low carbon footprint and a web3 team. It allows users to retain ownership of their profiles and content, set up DAO accounts, monetize content, and transfer data. History Stani Kulechov, founder and CEO of Aave, created Lens Protocol based on discussions regarding the significance of digital identity control. Kulechov has expressed criticism of the way Web 2.0 social media platforms handle user data, restrict user content, and determine what information they view. Stani Kulechov discussed the evolution and revolution of the social media ecosystem at LisCon in 2021, and the team posted an open letter on the 20th of January, 2022 citing the problem of companies owning and monetizing their users’ content. The letter described Web 2.0 social media platforms as antiquated, centralized syste

In [17]:
!pip install -U spacy

Collecting spacy
  Downloading spacy-3.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.6/6.6 MB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pathy>=0.10.0
  Downloading pathy-0.10.1-py3-none-any.whl (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.9/48.9 kB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cymem<2.1.0,>=2.0.2
  Downloading cymem-2.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34 kB)
Collecting srsly<3.0.0,>=2.4.3
  Downloading srsly-2.4.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (492 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m492.2/492.2 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting tqdm<5.0.0,>=4.38.0
  Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.1/77.1 k

In [18]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.5.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [19]:
import spacy

def split_into_chunks(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    chunks = []
    current_chunk = ""
    for sent in doc.sents:
        sent_text = sent.text.strip()
        if len(current_chunk) + len(sent_text) <= 300:
            current_chunk += sent_text + " "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sent_text + " "
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

In [20]:
content = 'Lens Protocol is a composable, decentralized social graph on the Polygon proof -of-stake (PoS) blockchain allowing creators to control their content and data. It is a decentralized social network with a low carbon footprint and a web3 team. It allows users to retain ownership of their profiles and content, set up DAO accounts, monetize content, and transfer data. History Stani Kulechov, founder and CEO of Aave, created Lens Protocol based on discussions regarding the significance of digital identity control. Kulechov has expressed criticism of the way Web 2.0 social media platforms handle user data, restrict user content, and determine what information they view. Stani Kulechov discussed the evolution and revolution of the social media ecosystem at LisCon in 2021, and the team posted an open letter on the 20th of January, 2022 citing the problem of companies owning and monetizing their users’ content. The letter described Web 2.0 social media platforms as antiquated, centralized systems, and proposed Web3 as a potential solution to give users control over their profiles and content. Lens Protocol was made available on Polygon’s Mumbai testnet on 7th February 2022 and transitioned to Polygon Mainnet on the 18th of May 2022. Prior to going live on the Mainnet, the platform underwent a PeckShield audit and was open to community scrutiny and bug reports. The code can be accessed via Github. The name Lens comes from the Lens Culinaris, a tall, branched plant with pods of lens-shaped, small lentils. It has a symbiotic relationship with certain soil bacteria. If the roots are left in the ground, they will provide a source of nitrogen for their neighbor. Similarly, Lens protocol is a composable and decentralized social graph which lets creators take ownership of their content wherever they go in the digital garden of the decentralized internet. Technology Creating a profile on Lens Protocol generates a composable non-fungible token (NFT) that stores content, comments, likes, and reposts, and is associated with a wallet address. The NFTs created can be traded and collected, and content creators can control who can obtain their publications. Additionally, creators can monetize their social media by requiring payment for certain content and setting up subscriptions. Because Lens Protocol is on the Polygon blockchain, applications can be deployed to any chain. Tokenization The Lens protocol has three layers of tokenization via ERC721 NFTs. All three are ERC721-compliant and fully composable. The LensHub upgradeable contract is the core entry point for the majority of interactions in the Lens Protocol. Nearly all interactions begin and double as the ERC721 NFT contract for profile NFTs, which are minted upon profile creation. Upon a profile\'s first follow, a FollowNFT contract is deployed (via minimal proxy cloning), unique to the profile; this is the ERC721 NFT contract that represents follower positions. Lastly, upon a publication\'s first collect, a CollectNFT contract is deployed (via minimal proxy cloning), unique to the publication; this is the ERC721 NFT contract that represents collected publications. Lens Protocol Features Profile NFTs Each profile is assigned an NFT. Data associated with the content and its interactions are stored on the token. Wallets can own multiple profile NFTs if desired. To avoid squatting, only addresses whitelisted by governance can create profiles. Applications requiring multiple user profiles can contact the team via Discord for assistance. Follow NFTs Receiving a Follow NFT grants the holder a rarity ranking and utility. Some utility tools may include voting rights if provided by the individual being followed. Publication Lens Protocol uses the term "Publication" to refer to content such as videos, photos, music, and text that can be posted onto a Profile NFT. Posting content directly to a Profile NFT can help ensure the ownership of the content. Collect Collects allow creators to monetize their content. Because creators own their content via the Lens Protocol, they are able to allow their followers to purchase that content. When a user posts a publication to their Profile NFT they have the option to set a Collect Module. This module will allow other users to mint NFTs that link to the publication\'s ContentURI. This module can contain any arbitrary logic to apply to the minting process and the resulting NFT. Mirror Mirrors are the curation tool of the Lens Protocol. They are the protocol\'s equivalent to reposting or re-amplifying content. Mirrors are treated the same as publications with a few additional checks and a few more minor features. Partners Arweave IPFS LitProtocol XMTP Dispatch OpenSea Zerion Alchemy ENS Gelato Livepeer POAP PoH Polygon Push Sybil.org Toucan WorldCoin Tellie Featured on Lens Lenster Buttrfly DumplingTV Lenstube Orb Phaver Re:Meme Lensta <br'

In [21]:
split_into_chunks(content)

['Lens Protocol is a composable, decentralized social graph on the Polygon proof -of-stake (PoS) blockchain allowing creators to control their content and data. It is a decentralized social network with a low carbon footprint and a web3 team.',
 'It allows users to retain ownership of their profiles and content, set up DAO accounts, monetize content, and transfer data. History Stani Kulechov, founder and CEO of Aave, created Lens Protocol based on discussions regarding the significance of digital identity control.',
 'Kulechov has expressed criticism of the way Web 2.0 social media platforms handle user data, restrict user content, and determine what information they view.',
 'Stani Kulechov discussed the evolution and revolution of the social media ecosystem at LisCon in 2021, and the team posted an open letter on the 20th of January, 2022 citing the problem of companies owning and monetizing their users’ content.',
 'The letter described Web 2.0 social media platforms as antiquated, 

In [22]:
df['chunks'] = df['content'].apply(split_into_chunks)

In [25]:
# Explode the chunks list into individual rows, copying the wikiid and title values
df_exploded = df.explode('chunks').reset_index(drop=True)
df_exploded = df_exploded[['wikiid', 'title', 'chunks']]

In [26]:
df_exploded

Unnamed: 0,wikiid,title,chunks
0,lens-protocol,Lens Protocol,"Lens Protocol is a composable, decentralized s..."
1,lens-protocol,Lens Protocol,It allows users to retain ownership of their p...
2,lens-protocol,Lens Protocol,Kulechov has expressed criticism of the way We...
3,lens-protocol,Lens Protocol,Stani Kulechov discussed the evolution and rev...
4,lens-protocol,Lens Protocol,The letter described Web 2.0 social media plat...
...,...,...,...
28573,alison-burger,Alison Burger,"While there, she provided accounting, financia..."
28574,alison-burger,Alison Burger,Conducted review and analysis of financial sta...
28575,alison-burger,Alison Burger,"While working the corporate lifestyle, she fou..."
28576,alison-burger,Alison Burger,While searching for a workout that would provi...


In [31]:
filtered_df = df_exploded.loc[df_exploded['wikiid'].str.contains('lens-protocol')]

In [32]:
num_rows = len(filtered_df)

In [33]:
num_rows

21

In [37]:
df_exploded = df_exploded.rename(columns={'chunks': 'content'})

In [38]:
df_exploded

Unnamed: 0,wikiid,title,content
0,lens-protocol,Lens Protocol,"Lens Protocol is a composable, decentralized s..."
1,lens-protocol,Lens Protocol,It allows users to retain ownership of their p...
2,lens-protocol,Lens Protocol,Kulechov has expressed criticism of the way We...
3,lens-protocol,Lens Protocol,Stani Kulechov discussed the evolution and rev...
4,lens-protocol,Lens Protocol,The letter described Web 2.0 social media plat...
...,...,...,...
28573,alison-burger,Alison Burger,"While there, she provided accounting, financia..."
28574,alison-burger,Alison Burger,Conducted review and analysis of financial sta...
28575,alison-burger,Alison Burger,"While working the corporate lifestyle, she fou..."
28576,alison-burger,Alison Burger,While searching for a workout that would provi...


In [39]:
!pip install supabase

Collecting supabase
  Downloading supabase-1.0.2-py3-none-any.whl (10 kB)
Collecting storage3<0.6.0,>=0.5.2
  Downloading storage3-0.5.2-py3-none-any.whl (13 kB)
Collecting python-semantic-release==7.33.2
  Downloading python_semantic_release-7.33.2-py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.1/55.1 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting supafunc<0.3.0,>=0.2.2
  Downloading supafunc-0.2.2-py3-none-any.whl (2.8 kB)
Collecting realtime<2.0.0,>=1.0.0
  Downloading realtime-1.0.0-py3-none-any.whl (8.0 kB)
Collecting postgrest<0.11.0,>=0.10.6
  Downloading postgrest-0.10.6-py3-none-any.whl (18 kB)
Collecting gotrue<2.0.0,>=1.0.0
  Downloading gotrue-1.0.0-py3-none-any.whl (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.3/48.3 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx<0.24.0,>=0.23.0
  Downloading httpx-0.23.3-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━

In [40]:
!pip install openai

Collecting openai
  Downloading openai-0.27.2-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp
  Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting async-timeout<5.0,>=4.0.0a3
  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Collecting aiosignal>=1.1.2
  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)
Collecting multidict<7.0,>=4.5
  Downloading multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.5/114.5 kB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting frozenlist>=1.1.1
  Downloading frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylin

In [41]:
import openai

In [42]:
import numpy as np
import supabase
import os

In [44]:
supabase_url = 'https://wpisaiqtaoykjwwszlan.supabase.co'
supabase_key = 'PRIVATE'
supabase_client = supabase.Client(supabase_url, supabase_key)

In [45]:
openai.api_key = 'PRIVATE'

In [46]:
text = 'Lens Protocol is a composable, decentralized social graph on the Polygon proof -of-stake (PoS) blockchain allowing creators to control their content and data. It is a decentralized social network with a low carbon footprint and a web3 team.'

In [47]:
def get_embedding(text, engine="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   return openai.Embedding.create(input = [text], model=engine)['data'][0]['embedding']

In [55]:
df_exploded.iloc[28577,2]

"Women of Crypto Burger is the creator and founder of the women's organization in the Blockchain and Cryptocurrency space known as Women of Crypto. She is also involved with shEOS as a business advisor."

In [57]:
for index, row in df.iterrows():
    embeddings = get_embedding(row['content'])
    data = {'wikiid': row['wikiid'],
            'title': row['title'],
            'content': row['content'],
            'embeddings': embeddings}
    supabase_client.table('wiki_embedding').insert(data)


KeyboardInterrupt



In [58]:
embeddings = get_embedding(df_exploded.iloc[0,2])
     

In [60]:
data = {'wikiid': df_exploded.iloc[0,0],
            'title': df_exploded.iloc[0,1],
            'content': df_exploded.iloc[0,2],
            'embeddings': embeddings}

In [62]:
 supabase_client.table('wiki_embedding').insert(data)

<postgrest._sync.request_builder.SyncQueryRequestBuilder at 0x7fda33cfa890>

In [66]:
df_exploded.to_csv('chunks.csv')

In [69]:
data = supabase_client.table("wiki_embedding").insert({"wikiid":df_exploded.iloc[0,0], "title": df_exploded.iloc[0,1], "content": df_exploded.iloc[0,2], "embedding": embeddings }).execute()

In [71]:
delete = supabase_client.table("wiki_embedding").delete().eq('id', 1).execute()

In [138]:
for i in range(28577, 28578):
    embeddings = get_embedding(df_exploded.iloc[i,2])
    supabase_client.table("wiki_embedding").insert({"wikiid":df_exploded.iloc[i,0], "title": df_exploded.iloc[i,1], "content": df_exploded.iloc[i,2], "embedding": embeddings }).execute()
    

  

In [139]:
df_exploded.iloc[28577]

wikiid                                         alison-burger
title                                          Alison Burger
content    Women of Crypto Burger is the creator and foun...
Name: 28577, dtype: object

In [102]:
delete = supabase_client.table("wiki_embedding").delete().eq('id', 16576).execute()