In [2]:
# !pip install "modin[all]"
# os.environ["MODIN_ENGINE"] = "ray"  # Modin will use Ray

### imports and util functions

In [77]:
# import modin.pandas as pd
import pandas as pd
from sentence_transformers import SentenceTransformer, CrossEncoder
import chromadb

model = SentenceTransformer("multi-qa-distilbert-cos-v1")

def generate_full_text(row):
    title = ""
    if row['title']:
        title = str(row['title'])
    return title + '\n\n' + row['text']

def structured_metadata(query_metadata):
    structured_metadata = {
        "tags" : [],
        "titles" : [],
        "urls" : []
    }

    for metadata  in  query_metadata[0]:
        structured_metadata['tags'].append(metadata['tags'])
        structured_metadata['titles'].append(metadata['title'])
        structured_metadata['urls'].append(metadata['url'])

    return structured_metadata


def build_result_multiple_queries(results):
    final_result = {
        "ids" : [[]],
        "distances": [[]],
        "metadatas": [[]]
    }
    for result in results:
        final_result['ids'][0].append(result['ids'][0][0])
        final_result['distances'][0].append(result['distances'][0][0])
        final_result['metadatas'][0].append(result['metadatas'][0][0])

    return final_result

def build_df_retrieved_result(result_original_retrieve):
    structured_metadata_dict = structured_metadata(result_original_retrieve['metadatas'])

    df_result_original_retrieve = pd.DataFrame({"ids" : result_original_retrieve['ids'][0],
                                            "distances": result_original_retrieve['distances'][0],
                                            "tags": structured_metadata_dict['tags'],
                                            "titles": structured_metadata_dict['titles'],
                                            "urls": structured_metadata_dict["urls"]
                                            })
    
    return df_result_original_retrieve


In [7]:
df_articles = pd.read_csv('medium_articles.csv')
df_articles.loc[df_articles['text'].isnull()]

Unnamed: 0,title,text,url,authors,timestamp,tags


In [13]:
# replace all nan values with None in dataframe
df_articles = df_articles.where(pd.notnull(df_articles), None)

In [14]:
df_articles.head(2)

Unnamed: 0,title,text,url,authors,timestamp,tags
0,Mental Note Vol. 24,Photo by Josh Riemer on Unsplash\n\nMerry Chri...,https://medium.com/invisible-illness/mental-no...,['Ryan Fan'],2020-12-26 03:38:10.479000+00:00,"['Mental Health', 'Health', 'Psychology', 'Sci..."
1,Your Brain On Coronavirus,Your Brain On Coronavirus\n\nA guide to the cu...,https://medium.com/age-of-awareness/how-the-pa...,['Simon Spichak'],2020-09-23 22:10:17.126000+00:00,"['Mental Health', 'Coronavirus', 'Science', 'P..."


In [6]:
df_articles = df_articles.sample(100000)
df_articles.reset_index(drop=True, inplace=True)

In [7]:
df_articles['full_text'] = df_articles.apply(generate_full_text, axis=1)
all_articles = df_articles['full_text'].tolist()

In [8]:
all_articles_embeddings = model.encode(all_articles, device='cuda', show_progress_bar=True)

Batches: 100%|██████████| 3125/3125 [3:11:09<00:00,  3.67s/it]  


In [9]:
df_articles['sentence_embeddings'] = all_articles_embeddings.tolist()

In [10]:
del all_articles_embeddings, all_articles

In [11]:
df_articles.drop(['text', 'timestamp', 'authors'], axis=1, inplace=True)

In [12]:
df_articles.to_parquet('./df_embeddings.parquet')

In [13]:
df_articles.head(2)

Unnamed: 0,title,url,tags,full_text,sentence_embeddings
0,Warming your lambda functions using the AWS CDK,https://medium.com/@louisjq/warming-your-lambd...,"['AWS', 'AWS Lambda', 'Cdk']",Warming your lambda functions using the AWS CD...,"[0.00647616246715188, 0.0004706464533228427, 0..."
1,Meeting My Teenager Self,https://medium.com/@chembarathi/meeting-my-tee...,"['Books And Authors', 'Personal Essay', 'Refle...",Meeting My Teenager Self\n\nSome days I start ...,"[-0.00991743989288807, -0.029601380228996277, ..."


### Configuring Chroma db

In [2]:
path_vector_db = './db/'

client = chromadb.PersistentClient(path=path_vector_db)

client.heartbeat()

1707052498712584905

In [3]:
# create a collection, run this only once
collection = client.create_collection(
        name="medium_articles",
        metadata={"hnsw:space": "cosine"} 
    )

In [3]:
# run this if you have already created the collection
collection = client.get_collection(name="medium_articles")

In [4]:
df_articles = pd.read_parquet('./df_embeddings.parquet')

In [5]:
df_articles.head(3)

Unnamed: 0,title,url,tags,full_text,sentence_embeddings
0,Warming your lambda functions using the AWS CDK,https://medium.com/@louisjq/warming-your-lambd...,"['AWS', 'AWS Lambda', 'Cdk']",Warming your lambda functions using the AWS CD...,"[0.00647616246715188, 0.0004706464533228427, 0..."
1,Meeting My Teenager Self,https://medium.com/@chembarathi/meeting-my-tee...,"['Books And Authors', 'Personal Essay', 'Refle...",Meeting My Teenager Self\n\nSome days I start ...,"[-0.00991743989288807, -0.029601380228996277, ..."
2,I Give You A Name (& this is my blessing),https://medium.com/@ashrowan/i-give-you-a-name...,"['Faithfullylgbt', 'Nonbinary', 'Latter Day Sa...",I Give You A Name (& this is my blessing)\n\nL...,"[0.06086372956633568, -0.0077235340140759945, ..."


In [7]:
df_articles['sentence_embeddings'] = df_articles['sentence_embeddings'].apply(lambda x: x.tolist())
all_articles_embeddings = df_articles['sentence_embeddings'].tolist()


In [15]:
df_articles.loc[df_articles[['title', 'tags', 'url']].isnull().any(axis=1)]

Unnamed: 0,title,url,tags,full_text,sentence_embeddings,id
21769,,https://medium.com/chris-dialogue/%E0%B8%84%E0...,"['Politics', 'Life']",\n\nI am a product builder who specializes in ...,"[0.10476582497358322, 0.030461367219686508, 0....",21769
34781,,https://medium.com/chris-dialogue/%E0%B9%84%E0...,['Life'],\n\nI am a product builder who specializes in ...,"[0.10476582497358322, 0.030461367219686508, 0....",34781


In [16]:
df_articles.loc[df_articles['title'].isnull(), 'title'] = ''

In [17]:
all_articles_metadata = df_articles[['title', 'tags', 'url']].to_dict(orient='records')

In [13]:
# create a id column
df_articles['id'] = df_articles.index
df_articles['id'] = df_articles['id'].astype(str)

In [19]:
from tqdm import tqdm

batch_size = 100

total_iterations = len(df_articles) // batch_size + (len(df_articles) % batch_size > 0)

In [None]:
# Crie uma barra de progresso
for i in tqdm(range(0, len(df_articles), batch_size), total=total_iterations, desc="Adding to collection"):
    batch_embeddings = df_articles['sentence_embeddings'].iloc[i:i + batch_size].tolist()
    batch_metadatas = all_articles_metadata[i:i + batch_size]
    batch_ids = df_articles['id'].iloc[i:i + batch_size].tolist()

    collection.add(embeddings=batch_embeddings, metadatas=batch_metadatas, ids=batch_ids)

In [6]:
collection.count()

100000

In [34]:
### query with filter

collection.get(
    ids=["1", "2", "3", "4", "5"],	
    where={'title': {'$in': ['An act of fruition']}}
)

{'ids': ['3'],
 'embeddings': None,
 'metadatas': [{'tags': "['Empathy', 'Shakti Shetty', 'Random Musings']",
   'title': 'An act of fruition',
   'url': 'https://medium.com/shaktianspace/an-act-of-fruition-fffc0550e1a0'}],
 'documents': [None],
 'uris': None,
 'data': None}

#### Implementing RAG methods

In [37]:
# query based on search
query = "How to use python to create a web application"

query_embedding = model.encode(query, device='cuda', show_progress_bar=True)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [38]:
result_original_retrieve = collection.query(
    query_embeddings=query_embedding.tolist(),
    n_results=10,
)

In [75]:
df_result_original_retrieve = build_df_retrieved_result(result_original_retrieve)

In [78]:
df_result_original_retrieve

Unnamed: 0,ids,distances,tags,titles,urls
0,86007,0.268111,"['Flask', 'Python', 'HTML']",Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...
1,65351,0.278562,"['Python Project Ideas', 'Python Website', 'Py...",how to create a website in seven lines of pyth...,https://medium.com/@jyothiradityak/how-to-crea...
2,8240,0.337513,"['Software', 'Python', 'Web Development', 'Dja...",Basic application with Django,https://medium.com/datadriveninvestor/basic-ap...
3,64395,0.349885,"['Programming', 'Coding', 'Python', 'Django', ...",Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...
4,62018,0.353386,"['Data Science', 'Python', 'Programming', 'Web...",What exactly can you do with Python? Here are ...,https://towardsdatascience.com/what-can-you-do...
5,38143,0.367358,"['Python Programming', 'Flask Framework', 'Pyt...",Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...
6,53842,0.376031,"['Website', 'Pyth', 'Python Framework', 'Flask']",Your First website with Flask,https://medium.com/@nitis3211/your-first-websi...
7,21827,0.380803,"['Web Development', 'Flask Framework', 'Gettin...",Getting Started With Flask — Python Framework,https://medium.com/@1303karthick/getting-start...
8,66840,0.384633,"['Python', 'Programming', 'Python3', 'Software...",How to Create a Simple Django Web App With Pyt...,https://medium.com/better-programming/how-to-c...
9,98111,0.384633,"['Programming', 'Django', 'Python3', 'Python',...",How to Create a Simple Django Web App With Pyt...,https://betterprogramming.pub/how-to-create-a-...


### Query Expansion

In [56]:
# !pip install --upgrade openai

In [79]:
from openai import OpenAI

client = OpenAI(
  api_key="",
)

In [80]:
# Function to create related queries
def generate_related_queries(custom_prompt, query, model="gpt-3.5-turbo"):
    messages = [
        {
            "role": "system",
            "content": f"{custom_prompt}"
        },
        {"role": "user", "content": query}
    ]

    original_content = client.chat.completions.create(
            model = model,
            messages = messages
            )
    
    content = original_content.choices[0].message.content
    content = content.split("\n")
    return content, original_content

In [81]:
query = "How to use python to create a web application"

In [82]:
### testing with related topics

custom_prompt_related_topics = """ You are a helpful assistant that helps people to find good articles.
                    Your users are trying to find good articles about certain area. 
                    Suggest up to five additional related search texts to help them find the article they need
                    Suggest only short search texts without compound sentences. Suggest a variety of texts that cover different aspects of the topic.
                    Make sure they are complete search texts, and that they are related to the original text.
                    Output one text per line. Do not number or describe the texts."""

related_queries, original_content = generate_related_queries(custom_prompt_related_topics, query)

In [83]:
related_queries

['Python web application development tutorial',
 'Best practices for Python web development',
 'Introduction to Flask: A Python web framework',
 'Django vs Flask: Choosing the right Python web framework',
 'Python web application deployment guide']

In [None]:
results = []

for query in related_queries:
    query_embedding = model.encode(query, device='cuda', show_progress_bar=True)
    
    result_query = collection.query(
        query_embeddings=query_embedding.tolist(),
        n_results=5,
    )
    
    results.append(result_query)

In [73]:
### all results

# for result in results:
#     for metadata in result['metadatas'][0]:
#         print(f"title: {metadata['title']} || tags: {metadata['tags']}")

In [115]:
final_result_query_expansion = build_result_multiple_queries(results)

In [117]:
final_result_query_expansion

{'ids': [['38143', '64395', '38143', '76575', '64395']],
 'distances': [[0.3045845627784729,
   0.36013704538345337,
   0.1690163016319275,
   0.20691239833831787,
   0.3864551782608032]],
 'metadatas': [[{'tags': "['Python Programming', 'Flask Framework', 'Python3', 'Flask', 'Python']",
    'title': 'Flask Tutorial Python — Basics of The Flask Framework',
    'url': 'https://medium.com/@p4p3rb0y1/flask-tutorial-python-basics-of-the-flask-framework-8a6c6fc55c0d'},
   {'tags': "['Programming', 'Coding', 'Python', 'Django', 'Web Development']",
    'title': 'Web Development in Python: Lesson 1',
    'url': 'https://medium.com/python-in-plain-english/web-development-in-python-lesson-1-d87ef3e439fc'},
   {'tags': "['Python Programming', 'Flask Framework', 'Python3', 'Flask', 'Python']",
    'title': 'Flask Tutorial Python — Basics of The Flask Framework',
    'url': 'https://medium.com/@p4p3rb0y1/flask-tutorial-python-basics-of-the-flask-framework-8a6c6fc55c0d'},
   {'tags': "['Software De

In [120]:
df_query_expansion_related_topics_result = build_df_retrieved_result(final_result_query_expansion)

In [121]:
df_query_expansion_related_topics_result

Unnamed: 0,ids,distances,tags,titles,urls
0,38143,0.304585,"['Python Programming', 'Flask Framework', 'Pyt...",Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...
1,64395,0.360137,"['Programming', 'Coding', 'Python', 'Django', ...",Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...
2,38143,0.169016,"['Python Programming', 'Flask Framework', 'Pyt...",Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...
3,76575,0.206912,"['Software Development', 'Programming', 'Pytho...",What Are The Best & Popular Python Frameworks,https://medium.com/python-in-plain-english/wha...
4,64395,0.386455,"['Programming', 'Coding', 'Python', 'Django', ...",Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...


In [122]:
### testing with key words related to query

query = "How to use python to create a web application"

custom_prompt_related_topics = """ You are a helpful assistant that helps people to find good articles.
                    Your users are trying to find good articles about certain area. 
                    Suggest up to 10 related keywords  to the search topic to help them find the article they need.
                    Suggest a variety of keywords that cover different aspects of the topic.
                    Make sure they are complete search texts, and that they are related to the original text.
                    The output MUST be one keyword per line. Do not number or describe the keywords."""

related_keywords, original_content = generate_related_queries(custom_prompt_related_topics, query)

In [123]:
related_keywords

['python web development',
 'web application development with python',
 'python web frameworks',
 'python flask tutorial',
 'django web development',
 'python web server',
 'python web development tutorial',
 'python web application example',
 'python web development libraries',
 'python web application deployment']

In [None]:
results = []

for keyword in related_keywords:
    query_embedding = model.encode(keyword, device='cuda', show_progress_bar=True)
    
    result_query = collection.query(
        query_embeddings=query_embedding.tolist(),
        n_results=5,
    )
    
    results.append(result_query)

In [69]:
### all results

# for result in results:
#     for metadata in result['metadatas'][0]:
#         print(f"title: {metadata['title']} || tags: {metadata['tags']}")

In [125]:
for result in results:
    print(result['metadatas'][0][0])

{'tags': "['Flask', 'Python', 'HTML']", 'title': 'Web Hosting using Python', 'url': 'https://medium.com/datadriveninvestor/web-hosting-using-python-3dbb00abdcba'}
{'tags': "['Flask', 'Python', 'HTML']", 'title': 'Web Hosting using Python', 'url': 'https://medium.com/datadriveninvestor/web-hosting-using-python-3dbb00abdcba'}
{'tags': "['Software Development', 'Programming', 'Python', 'Data Science', 'Web Development']", 'title': 'What Are The Best & Popular Python Frameworks', 'url': 'https://medium.com/python-in-plain-english/what-are-the-best-popular-python-freamworks-4379f4a6e37c'}
{'tags': "['Python Programming', 'Flask Framework', 'Python3', 'Flask', 'Python']", 'title': 'Flask Tutorial Python — Basics of The Flask Framework', 'url': 'https://medium.com/@p4p3rb0y1/flask-tutorial-python-basics-of-the-flask-framework-8a6c6fc55c0d'}
{'tags': "['Portal', 'Web Development', 'Django Framework', 'Django', 'Python']", 'title': 'A Basic Overview of Django, the Python Web Framework', 'url': 

In [126]:
final_result_query_expansion_related_keywords = build_result_multiple_queries(results)

In [127]:
df_query_expansion_related_keywords_result = build_df_retrieved_result(final_result_query_expansion_related_keywords)

In [128]:
df_query_expansion_related_keywords_result

Unnamed: 0,ids,distances,tags,titles,urls
0,86007,0.274412,"['Flask', 'Python', 'HTML']",Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...
1,86007,0.309433,"['Flask', 'Python', 'HTML']",Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...
2,76575,0.237362,"['Software Development', 'Programming', 'Pytho...",What Are The Best & Popular Python Frameworks,https://medium.com/python-in-plain-english/wha...
3,38143,0.278728,"['Python Programming', 'Flask Framework', 'Pyt...",Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...
4,96567,0.264278,"['Portal', 'Web Development', 'Django Framewor...","A Basic Overview of Django, the Python Web Fra...",https://medium.com/@leofalcone/a-basic-overvie...
5,86007,0.310809,"['Flask', 'Python', 'HTML']",Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...
6,33669,0.32469,"['Data Science', 'Web Scraping', 'Python', 'An...",Web scraping com Python,https://medium.com/dados/web-scraping-com-pyth...
7,86007,0.347834,"['Flask', 'Python', 'HTML']",Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...
8,76575,0.367306,"['Software Development', 'Programming', 'Pytho...",What Are The Best & Popular Python Frameworks,https://medium.com/python-in-plain-english/wha...
9,64395,0.372018,"['Programming', 'Coding', 'Python', 'Django', ...",Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...


### Cross Encoder re-ranking

Consiste na técnica de obter "candidatos" através da busca por similaridade, porém após obter esses candidatos, cada candidato e a query original servem de entrada para a rede Transformer e a saída é dada por um score entre 0 e 1 indicando o quanto o candidato é relevante para a query

In [129]:
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

In [130]:
df_articles.head(1)

Unnamed: 0,title,url,tags,full_text,sentence_embeddings,id
0,Warming your lambda functions using the AWS CDK,https://medium.com/@louisjq/warming-your-lambd...,"['AWS', 'AWS Lambda', 'Cdk']",Warming your lambda functions using the AWS CD...,"[0.00647616246715188, 0.0004706464533228427, 0...",0


In [131]:
retrieved_ids = result_original_retrieve['ids'][0]

In [132]:
retrieved_ids

['86007',
 '65351',
 '8240',
 '64395',
 '62018',
 '38143',
 '53842',
 '21827',
 '66840',
 '98111']

In [133]:
def get_article_sentences(data, ids):
    return data.loc[data['id'].isin(ids)]

In [134]:
df_retrieved_data = get_article_sentences(df_articles, retrieved_ids)

In [135]:
retrieved_articles_text = df_retrieved_data['full_text'].tolist()

In [136]:
retrieved_articles_text

['Basic application with Django\n\nFinally we got to the awesome Python framework Django which allows you to use python for developing web applications.\n\nPrerequisites\n\nFirst ensure that you have python and pip installed. Then let’s install Django:\n\npython -m pip install Django\n\nAnd check if it’s installed and ready to use:\n\npython -m django — version\n\nIf you can see a version of the Django library then you are ready to go.\n\nProject Setup\n\nNow, let’s create new Django project:\n\ndjango-admin startproject myCoolProject\n\nThis command will create a myCoolProject directory in the directory you recently located. And inside it will create this files:\n\nNext we can cd into myCoolProject directory and start the server:\n\npython manage.py runserver\n\nIf everything is correct you should see something like this in your console:\n\nAnd if you open http://127.0.0.1:8000 in web browser you should see this:\n\nNew app\n\nNow we know that Django and the project environment is rea

In [137]:
model_inputs = [[query, text] for text in retrieved_articles_text]

In [31]:
# model_inputs

In [138]:
scores = cross_encoder.predict(model_inputs)

In [139]:
# Sort the scores in decreasing order
results = [{"input": inp, "score": score} for inp, score in zip(model_inputs, scores)]
results = sorted(results, key=lambda x: x["score"], reverse=True)

In [146]:
results_re_ranking = [{"query": inp[0], "text": inp[1], "score": score} for inp, score in zip(model_inputs, scores)]

In [148]:
results_re_ranking_df = pd.DataFrame.from_dict(results_re_ranking)

In [161]:
results_re_ranking_df.columns

Index(['query', 'text', 'score'], dtype='object')

In [164]:
results_re_ranking_df.rename(columns={"text": "full_text"}, inplace=True)

In [166]:
df_retrieved_data_merged = df_retrieved_data.merge(results_re_ranking_df, on='full_text')

In [170]:
df_retrieved_data_merged

Unnamed: 0,title,url,tags,full_text,sentence_embeddings,id,query,score
0,Basic application with Django,https://medium.com/datadriveninvestor/basic-ap...,"['Software', 'Python', 'Web Development', 'Dja...",Basic application with Django\n\nFinally we go...,"[0.055158913135528564, -0.004764148499816656, ...",8240,How to use python to create a web application,5.973814
1,Getting Started With Flask — Python Framework,https://medium.com/@1303karthick/getting-start...,"['Web Development', 'Flask Framework', 'Gettin...",Getting Started With Flask — Python Framework\...,"[0.05524623394012451, 0.011870672926306725, 0....",21827,How to use python to create a web application,1.133836
2,Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...,"['Python Programming', 'Flask Framework', 'Pyt...",Flask Tutorial Python — Basics of The Flask Fr...,"[0.05366446077823639, 0.005391155369579792, 0....",38143,How to use python to create a web application,3.802685
3,Your First website with Flask,https://medium.com/@nitis3211/your-first-websi...,"['Website', 'Pyth', 'Python Framework', 'Flask']",Your First website with Flask\n\nFlask is one ...,"[0.09390280395746231, -0.023226771503686905, 0...",53842,How to use python to create a web application,1.786698
4,What exactly can you do with Python? Here are ...,https://towardsdatascience.com/what-can-you-do...,"['Data Science', 'Python', 'Programming', 'Web...",What exactly can you do with Python? Here are ...,"[0.03987741097807884, 0.03799786791205406, 0.0...",62018,How to use python to create a web application,5.110482
5,Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...,"['Programming', 'Coding', 'Python', 'Django', ...",Web Development in Python: Lesson 1\n\nA quick...,"[0.03741590306162834, -0.007534318137913942, 0...",64395,How to use python to create a web application,6.223163
6,how to create a website in seven lines of pyth...,https://medium.com/@jyothiradityak/how-to-crea...,"['Python Project Ideas', 'Python Website', 'Py...",how to create a website in seven lines of pyth...,"[0.07365156710147858, -0.029248660430312157, 0...",65351,How to use python to create a web application,7.766557
7,How to Create a Simple Django Web App With Pyt...,https://medium.com/better-programming/how-to-c...,"['Python', 'Programming', 'Python3', 'Software...",How to Create a Simple Django Web App With Pyt...,"[0.023062171414494514, 0.01495151873677969, 0....",66840,How to use python to create a web application,6.463696
8,How to Create a Simple Django Web App With Pyt...,https://medium.com/better-programming/how-to-c...,"['Python', 'Programming', 'Python3', 'Software...",How to Create a Simple Django Web App With Pyt...,"[0.023062171414494514, 0.01495151873677969, 0....",66840,How to use python to create a web application,6.463696
9,How to Create a Simple Django Web App With Pyt...,https://betterprogramming.pub/how-to-create-a-...,"['Programming', 'Django', 'Python3', 'Python',...",How to Create a Simple Django Web App With Pyt...,"[0.023062171414494514, 0.01495151873677969, 0....",98111,How to use python to create a web application,6.463696


In [175]:
df_retrieved_data_merged.drop(columns=['sentence_embeddings', 'query', 'full_text'], axis=1, inplace=True)

In [176]:
df_retrieved_data_merged

Unnamed: 0,title,url,tags,id,score
0,Basic application with Django,https://medium.com/datadriveninvestor/basic-ap...,"['Software', 'Python', 'Web Development', 'Dja...",8240,5.973814
1,Getting Started With Flask — Python Framework,https://medium.com/@1303karthick/getting-start...,"['Web Development', 'Flask Framework', 'Gettin...",21827,1.133836
2,Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...,"['Python Programming', 'Flask Framework', 'Pyt...",38143,3.802685
3,Your First website with Flask,https://medium.com/@nitis3211/your-first-websi...,"['Website', 'Pyth', 'Python Framework', 'Flask']",53842,1.786698
4,What exactly can you do with Python? Here are ...,https://towardsdatascience.com/what-can-you-do...,"['Data Science', 'Python', 'Programming', 'Web...",62018,5.110482
5,Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...,"['Programming', 'Coding', 'Python', 'Django', ...",64395,6.223163
6,how to create a website in seven lines of pyth...,https://medium.com/@jyothiradityak/how-to-crea...,"['Python Project Ideas', 'Python Website', 'Py...",65351,7.766557
7,How to Create a Simple Django Web App With Pyt...,https://medium.com/better-programming/how-to-c...,"['Python', 'Programming', 'Python3', 'Software...",66840,6.463696
8,How to Create a Simple Django Web App With Pyt...,https://medium.com/better-programming/how-to-c...,"['Python', 'Programming', 'Python3', 'Software...",66840,6.463696
9,How to Create a Simple Django Web App With Pyt...,https://betterprogramming.pub/how-to-create-a-...,"['Programming', 'Django', 'Python3', 'Python',...",98111,6.463696


#### Comparing results obtained

In [178]:
##### Orinal retrieve
df_result_original_retrieve

Unnamed: 0,ids,distances,tags,titles,urls
0,86007,0.268111,"['Flask', 'Python', 'HTML']",Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...
1,65351,0.278562,"['Python Project Ideas', 'Python Website', 'Py...",how to create a website in seven lines of pyth...,https://medium.com/@jyothiradityak/how-to-crea...
2,8240,0.337513,"['Software', 'Python', 'Web Development', 'Dja...",Basic application with Django,https://medium.com/datadriveninvestor/basic-ap...
3,64395,0.349885,"['Programming', 'Coding', 'Python', 'Django', ...",Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...
4,62018,0.353386,"['Data Science', 'Python', 'Programming', 'Web...",What exactly can you do with Python? Here are ...,https://towardsdatascience.com/what-can-you-do...
5,38143,0.367358,"['Python Programming', 'Flask Framework', 'Pyt...",Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...
6,53842,0.376031,"['Website', 'Pyth', 'Python Framework', 'Flask']",Your First website with Flask,https://medium.com/@nitis3211/your-first-websi...
7,21827,0.380803,"['Web Development', 'Flask Framework', 'Gettin...",Getting Started With Flask — Python Framework,https://medium.com/@1303karthick/getting-start...
8,66840,0.384633,"['Python', 'Programming', 'Python3', 'Software...",How to Create a Simple Django Web App With Pyt...,https://medium.com/better-programming/how-to-c...
9,98111,0.384633,"['Programming', 'Django', 'Python3', 'Python',...",How to Create a Simple Django Web App With Pyt...,https://betterprogramming.pub/how-to-create-a-...


In [179]:
for idx, row in df_result_original_retrieve.iterrows():
    print(f"title: {row['titles']} || url {row['urls']} || tags: {row['tags']}")

title: Web Hosting using Python || url https://medium.com/datadriveninvestor/web-hosting-using-python-3dbb00abdcba || tags: ['Flask', 'Python', 'HTML']
title: how to create a website in seven lines of python code. || url https://medium.com/@jyothiradityak/how-to-create-a-website-in-seven-lines-of-python-code-90192668dd58 || tags: ['Python Project Ideas', 'Python Website', 'Python Beginner', 'Flask Framework', 'Python Programming']
title: Basic application with Django || url https://medium.com/datadriveninvestor/basic-application-with-django-3afab115bb9a || tags: ['Software', 'Python', 'Web Development', 'Django', 'Learning To Code']
title: Web Development in Python: Lesson 1 || url https://medium.com/python-in-plain-english/web-development-in-python-lesson-1-d87ef3e439fc || tags: ['Programming', 'Coding', 'Python', 'Django', 'Web Development']
title: What exactly can you do with Python? Here are Python's 3 main applications. || url https://towardsdatascience.com/what-can-you-do-with-py

In [183]:
#### query expansion with related topics
df_query_expansion_related_topics_result = df_query_expansion_related_topics_result.sort_values(by='distances', ascending=True)

In [184]:
df_query_expansion_related_topics_result

Unnamed: 0,ids,distances,tags,titles,urls
2,38143,0.169016,"['Python Programming', 'Flask Framework', 'Pyt...",Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...
3,76575,0.206912,"['Software Development', 'Programming', 'Pytho...",What Are The Best & Popular Python Frameworks,https://medium.com/python-in-plain-english/wha...
0,38143,0.304585,"['Python Programming', 'Flask Framework', 'Pyt...",Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...
1,64395,0.360137,"['Programming', 'Coding', 'Python', 'Django', ...",Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...
4,64395,0.386455,"['Programming', 'Coding', 'Python', 'Django', ...",Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...


In [185]:
for idx, row in df_query_expansion_related_topics_result.iterrows():
    print(f"title: {row['titles']} || url {row['urls']} || tags: {row['tags']}")

title: Flask Tutorial Python — Basics of The Flask Framework || url https://medium.com/@p4p3rb0y1/flask-tutorial-python-basics-of-the-flask-framework-8a6c6fc55c0d || tags: ['Python Programming', 'Flask Framework', 'Python3', 'Flask', 'Python']
title: What Are The Best & Popular Python Frameworks || url https://medium.com/python-in-plain-english/what-are-the-best-popular-python-freamworks-4379f4a6e37c || tags: ['Software Development', 'Programming', 'Python', 'Data Science', 'Web Development']
title: Flask Tutorial Python — Basics of The Flask Framework || url https://medium.com/@p4p3rb0y1/flask-tutorial-python-basics-of-the-flask-framework-8a6c6fc55c0d || tags: ['Python Programming', 'Flask Framework', 'Python3', 'Flask', 'Python']
title: Web Development in Python: Lesson 1 || url https://medium.com/python-in-plain-english/web-development-in-python-lesson-1-d87ef3e439fc || tags: ['Programming', 'Coding', 'Python', 'Django', 'Web Development']
title: Web Development in Python: Lesson 1 

In [186]:
### query expansion with related keywords
df_query_expansion_related_keywords_result = df_query_expansion_related_keywords_result.sort_values(by='distances', ascending=True)

In [187]:
df_query_expansion_related_keywords_result

Unnamed: 0,ids,distances,tags,titles,urls
2,76575,0.237362,"['Software Development', 'Programming', 'Pytho...",What Are The Best & Popular Python Frameworks,https://medium.com/python-in-plain-english/wha...
4,96567,0.264278,"['Portal', 'Web Development', 'Django Framewor...","A Basic Overview of Django, the Python Web Fra...",https://medium.com/@leofalcone/a-basic-overvie...
0,86007,0.274412,"['Flask', 'Python', 'HTML']",Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...
3,38143,0.278728,"['Python Programming', 'Flask Framework', 'Pyt...",Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...
1,86007,0.309433,"['Flask', 'Python', 'HTML']",Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...
5,86007,0.310809,"['Flask', 'Python', 'HTML']",Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...
6,33669,0.32469,"['Data Science', 'Web Scraping', 'Python', 'An...",Web scraping com Python,https://medium.com/dados/web-scraping-com-pyth...
7,86007,0.347834,"['Flask', 'Python', 'HTML']",Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...
8,76575,0.367306,"['Software Development', 'Programming', 'Pytho...",What Are The Best & Popular Python Frameworks,https://medium.com/python-in-plain-english/wha...
9,64395,0.372018,"['Programming', 'Coding', 'Python', 'Django', ...",Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...


In [188]:
for idx, row in df_query_expansion_related_keywords_result.iterrows():
    print(f"title: {row['titles']} || url {row['urls']} || tags: {row['tags']}")

title: What Are The Best & Popular Python Frameworks || url https://medium.com/python-in-plain-english/what-are-the-best-popular-python-freamworks-4379f4a6e37c || tags: ['Software Development', 'Programming', 'Python', 'Data Science', 'Web Development']
title: A Basic Overview of Django, the Python Web Framework || url https://medium.com/@leofalcone/a-basic-overview-of-django-the-python-web-framework-14f5d1623cc0 || tags: ['Portal', 'Web Development', 'Django Framework', 'Django', 'Python']
title: Web Hosting using Python || url https://medium.com/datadriveninvestor/web-hosting-using-python-3dbb00abdcba || tags: ['Flask', 'Python', 'HTML']
title: Flask Tutorial Python — Basics of The Flask Framework || url https://medium.com/@p4p3rb0y1/flask-tutorial-python-basics-of-the-flask-framework-8a6c6fc55c0d || tags: ['Python Programming', 'Flask Framework', 'Python3', 'Flask', 'Python']
title: Web Hosting using Python || url https://medium.com/datadriveninvestor/web-hosting-using-python-3dbb00

In [190]:
### results with re-ranking
df_retrieved_re_ranking = df_retrieved_data_merged.sort_values(by='score', ascending=False)

In [193]:
df_retrieved_re_ranking = df_retrieved_re_ranking.drop_duplicates("id")

In [195]:
df_retrieved_re_ranking

Unnamed: 0,title,url,tags,id,score
6,how to create a website in seven lines of pyth...,https://medium.com/@jyothiradityak/how-to-crea...,"['Python Project Ideas', 'Python Website', 'Py...",65351,7.766557
7,How to Create a Simple Django Web App With Pyt...,https://medium.com/better-programming/how-to-c...,"['Python', 'Programming', 'Python3', 'Software...",66840,6.463696
9,How to Create a Simple Django Web App With Pyt...,https://betterprogramming.pub/how-to-create-a-...,"['Programming', 'Django', 'Python3', 'Python',...",98111,6.463696
5,Web Development in Python: Lesson 1,https://medium.com/python-in-plain-english/web...,"['Programming', 'Coding', 'Python', 'Django', ...",64395,6.223163
0,Basic application with Django,https://medium.com/datadriveninvestor/basic-ap...,"['Software', 'Python', 'Web Development', 'Dja...",8240,5.973814
11,Web Hosting using Python,https://medium.com/datadriveninvestor/web-host...,"['Flask', 'Python', 'HTML']",86007,5.588793
4,What exactly can you do with Python? Here are ...,https://towardsdatascience.com/what-can-you-do...,"['Data Science', 'Python', 'Programming', 'Web...",62018,5.110482
2,Flask Tutorial Python — Basics of The Flask Fr...,https://medium.com/@p4p3rb0y1/flask-tutorial-p...,"['Python Programming', 'Flask Framework', 'Pyt...",38143,3.802685
3,Your First website with Flask,https://medium.com/@nitis3211/your-first-websi...,"['Website', 'Pyth', 'Python Framework', 'Flask']",53842,1.786698
1,Getting Started With Flask — Python Framework,https://medium.com/@1303karthick/getting-start...,"['Web Development', 'Flask Framework', 'Gettin...",21827,1.133836


In [196]:
for idx, row in df_retrieved_re_ranking.iterrows():
    print(f"title: {row['title']} || url {row['url']} || tags: {row['tags']}")

title: how to create a website in seven lines of python code. || url https://medium.com/@jyothiradityak/how-to-create-a-website-in-seven-lines-of-python-code-90192668dd58 || tags: ['Python Project Ideas', 'Python Website', 'Python Beginner', 'Flask Framework', 'Python Programming']
title: How to Create a Simple Django Web App With Python. || url https://medium.com/better-programming/how-to-create-a-simple-django-web-app-with-python-7ba75b4e34a6 || tags: ['Python', 'Programming', 'Python3', 'Software Development', 'Django']
title: How to Create a Simple Django Web App With Python. || url https://betterprogramming.pub/how-to-create-a-simple-django-web-app-with-python-7ba75b4e34a6 || tags: ['Programming', 'Django', 'Python3', 'Python', 'Software Development']
title: Web Development in Python: Lesson 1 || url https://medium.com/python-in-plain-english/web-development-in-python-lesson-1-d87ef3e439fc || tags: ['Programming', 'Coding', 'Python', 'Django', 'Web Development']
title: Basic appli