## IMPORT REQUIRED LIBRARIES

In [1]:
import os
import re
import csv
import json
import time
import numpy as np
import pandas as pd
from pymilvus import (connections, utility, Collection, CollectionSchema, FieldSchema, DataType)
from langchain.docstore.document import Document
from langchain.document_loaders import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores.milvus import Milvus
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


## EXPLORING DATA

In [2]:
df = pd.read_excel("Automotive_Industry_Scraped_2022-24.xlsx")
df.head(5)

Unnamed: 0,Doc Identifier,Level 1,Assignee,Inventor,Title,Abstract,First Claim
0,CN114954187B,Patent Set,Ningbo Congyue Electronic Equipment Co ltd,黄韬,Vehicle-mounted cup stand and combined vehicle...,The invention discloses a vehicle-mounted cup ...,1. The vehicle-mounted cup stand is characteri...
1,CN116022087A,Patent Set,Faurecia Interieur Industrie SAS,"皮埃尔·莱格朗德 , 金-劳伊斯·托雷西拉斯",Vehicle ornament and production process of same,The invention relates to a vehicle ornament (2...,"1. A vehicle trim piece (2), the trim piece co..."
2,CA3137179A1,Patent Set,Manac Inc,"Guy Veilleux , Guillaume Samson",Light-weight beam assembly and truck trailer u...,Light-weight beam assembly and truck trailer u...,1. A light-weight truck trailer comprising:two...
3,CN112172932B,Patent Set,Toyota Motor Corp,"各务绫加 , 村田宗志朗 , 神野光雄 , 松井初美",Vehicle body lower structure,The vehicle body lower structure of the presen...,1. A vehicle body lower structure characterize...
4,CN114901231B,Patent Set,Mazda Motor Corp,"佐佐木刚史 , 高安宏明 , 德田大辅 , 栗栖邦彦 , 斋藤浩章 , 梅津贵则",Vehicle get-on/off auxiliary device,Provided is a vehicle entrance/exit assist dev...,"1. An entry/exit support device for a vehicle,..."


In [3]:
df.shape

(2742, 7)

In [4]:
df.columns

Index(['Doc Identifier', 'Level 1', 'Assignee', 'Inventor', 'Title',
       'Abstract', 'First Claim'],
      dtype='object')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2742 entries, 0 to 2741
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Doc Identifier  2742 non-null   object
 1   Level 1         2742 non-null   object
 2   Assignee        2482 non-null   object
 3   Inventor        2484 non-null   object
 4   Title           2484 non-null   object
 5   Abstract        2742 non-null   object
 6   First Claim     2740 non-null   object
dtypes: object(7)
memory usage: 150.1+ KB


In [6]:
df.rename(columns = {"Doc Identifier": "Doc_Identifier", "Level 1": "Level", "First Claim": "First_Claim"}, inplace=True)

In [7]:
df.isnull().sum()

Doc_Identifier      0
Level               0
Assignee          260
Inventor          258
Title             258
Abstract            0
First_Claim         2
dtype: int64

In [8]:
df.dropna(inplace=True)

print("Shape", df.shape)
print(df.isnull().sum())

Shape (2481, 7)
Doc_Identifier    0
Level             0
Assignee          0
Inventor          0
Title             0
Abstract          0
First_Claim       0
dtype: int64


In [9]:
df["Patent_Text"] = df.apply(lambda x: "Title: "+x["Title"]+"\nAbstract: "+x["Abstract"], axis=1)

In [11]:
df.to_csv("cleaned_data.csv", index = False)

In [12]:
records = df.to_dict(orient='records')

In [13]:
records[0]

{'Doc_Identifier': 'CN114954187B',
 'Level': 'Patent Set',
 'Assignee': 'Ningbo Congyue Electronic Equipment Co ltd',
 'Inventor': '黄韬',
 'Title': 'Vehicle-mounted cup stand and combined vehicle-mounted appliance comprising same',
 'Abstract': 'The invention discloses a vehicle-mounted cup stand and a combined vehicle-mounted appliance containing the same, wherein the combined vehicle-mounted appliance comprises a cup stand shell, a cup stand foot support and a knob component are sequentially arranged in the cup stand shell from bottom to top, and the opening and the retraction of the cup stand foot support are adjusted under the action of the knob component; the knob component comprises a knob lower shell, a knob upper shell, a knob cover plate and a knob mechanism, wherein the knob upper shell is positioned above the knob lower shell, the knob mechanism is installed on the knob upper shell, and the knob cover plate is installed on the inner side wall of the cup stand shell and is in 

## CREATA DOCUMENTS

In [14]:
metadata_cols = df.columns.to_list()
metadata_cols.remove("Patent_Text")

loader = CSVLoader("./cleaned_data.csv", metadata_columns = metadata_cols)
documents = loader.load()
len(documents)

2481

In [15]:
documents[0]

Document(page_content='Patent_Text: Title: Vehicle-mounted cup stand and combined vehicle-mounted appliance comprising same\nAbstract: The invention discloses a vehicle-mounted cup stand and a combined vehicle-mounted appliance containing the same, wherein the combined vehicle-mounted appliance comprises a cup stand shell, a cup stand foot support and a knob component are sequentially arranged in the cup stand shell from bottom to top, and the opening and the retraction of the cup stand foot support are adjusted under the action of the knob component; the knob component comprises a knob lower shell, a knob upper shell, a knob cover plate and a knob mechanism, wherein the knob upper shell is positioned above the knob lower shell, the knob mechanism is installed on the knob upper shell, and the knob cover plate is installed on the inner side wall of the cup stand shell and is in contact with the knob mechanism. According to the vehicle-mounted cup stand, the first saw teeth and the secon

In [18]:
print(documents[0].page_content)

Patent_Text: Title: Vehicle-mounted cup stand and combined vehicle-mounted appliance comprising same
Abstract: The invention discloses a vehicle-mounted cup stand and a combined vehicle-mounted appliance containing the same, wherein the combined vehicle-mounted appliance comprises a cup stand shell, a cup stand foot support and a knob component are sequentially arranged in the cup stand shell from bottom to top, and the opening and the retraction of the cup stand foot support are adjusted under the action of the knob component; the knob component comprises a knob lower shell, a knob upper shell, a knob cover plate and a knob mechanism, wherein the knob upper shell is positioned above the knob lower shell, the knob mechanism is installed on the knob upper shell, and the knob cover plate is installed on the inner side wall of the cup stand shell and is in contact with the knob mechanism. According to the vehicle-mounted cup stand, the first saw teeth and the second saw teeth are ingeniou

In [19]:
documents[0].metadata

{'source': './cleaned_data.csv',
 'row': 0,
 'Doc_Identifier': 'CN114954187B',
 'Level': 'Patent Set',
 'Assignee': 'Ningbo Congyue Electronic Equipment Co ltd',
 'Inventor': '黄韬',
 'Title': 'Vehicle-mounted cup stand and combined vehicle-mounted appliance comprising same',
 'Abstract': 'The invention discloses a vehicle-mounted cup stand and a combined vehicle-mounted appliance containing the same, wherein the combined vehicle-mounted appliance comprises a cup stand shell, a cup stand foot support and a knob component are sequentially arranged in the cup stand shell from bottom to top, and the opening and the retraction of the cup stand foot support are adjusted under the action of the knob component; the knob component comprises a knob lower shell, a knob upper shell, a knob cover plate and a knob mechanism, wherein the knob upper shell is positioned above the knob lower shell, the knob mechanism is installed on the knob upper shell, and the knob cover plate is installed on the inner

## EMBEDDING MODEL

In [2]:
embeddings_minilm = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

  warn_deprecated(


## MILVUS

In [3]:
_HOST = "localhost"
_PORT = "19530"


def create_milvus_client():
    try:
        connection_args = {
            "host": _HOST,
            "port": _PORT
        }
        connections.connect(**connection_args)
        print("Successfully created Milvus Client")
        
    except Exception as e:
        print(f"Failed to create Milvus client: {e}")
        return False, {}
    return True, connection_args


def list_milvus_collections():
    collection_names = utility.list_collections()
    return collection_names


def check_milvus_collection(collection_name):
    collection_exists = utility.has_collection(collection_name)
    print(f"Does collection {collection_name} exist in Milvus: {collection_exists}")


def drop_milvus_collection(collection_name):
    try:
        utility.drop_collection(collection_name)
        print(f"Collection named - {collection_name} dropped from Milvus")
    except Exception as e:
        print("An error occured:", str(e))

In [4]:
_, connection_args = create_milvus_client()

Successfully created Milvus Client


In [5]:
# List all Milvus Collections
milvus_collections = list_milvus_collections()
print("List of collections:", milvus_collections)

List of collections: ['Demo_Index_V1']


In [25]:
INDEX_NAME = "Demo_Index_V1"

In [26]:
# Return VectorStore initialized from documents and embeddings

Milvus.from_documents(
    documents,
    embeddings_minilm,
    connection_args = connection_args,
    collection_name = INDEX_NAME
)
print("Milvus collection created successfully")

Milvus collection created successfully


In [26]:
# Check for created Collection
check_milvus_collection(INDEX_NAME)

Does collection Demo_Index_V1 exist in Milvus: True


## SEARCHING

In [33]:
def dict_to_tuple(d):
    if isinstance(d, dict):
        return tuple((k, dict_to_tuple(v)) for k, v in d.items())
    else:
        return d

In [34]:
query = "Tell me about Vehicle body lower structure"

In [35]:
milvus = Milvus(embeddings_minilm, connection_args = connection_args, collection_name = INDEX_NAME)

ret_docs_mmr = milvus.max_marginal_relevance_search(query, k=10)
ret_docs_sim = milvus.similarity_search_with_score(query, k=10)

ret_docs_mmr_tuples = [(doc.page_content, dict_to_tuple(doc.metadata)) for doc in ret_docs_mmr]
ret_docs_sim_tuples = [(doc[0].page_content, dict_to_tuple(doc[0].metadata)) for doc in ret_docs_sim]

combined_tuples = ret_docs_mmr_tuples + ret_docs_sim_tuples
unique_tuples = set(combined_tuples)

merged_results = [Document(page_content=content, metadata=dict(metadata)) for content, metadata in unique_tuples][:3]
merged_results

  warn_deprecated(


[Document(page_content='Patent_Text: Title: Vehicle body lower structure\nAbstract: A vehicle body lower structure may include: a hollow rocker arranged at a lower lateral part of a vehicle body and extending along a front-rear direction of the vehicle body; a power source arranged adjacent to the rocker; an energy absorbing member connected to the power source and arranged under the rocker; and a collar arranged between the rocker and the energy absorbing member. The rocker may be provided with a flange extending downward from a bottom plate of the rocker. A height of the collar above the energy absorbing member may be greater than a height of the flange.', metadata={'source': './cleaned_data.csv', 'row': 2027, 'Doc_Identifier': 'US11485415B2', 'Level': 'Patent Set', 'Assignee': 'Subaru Corp , Toyota Motor Corp', 'Inventor': 'Naoki Takahashi , Shunji Shibata', 'Title': 'Vehicle body lower structure', 'Abstract': 'A vehicle body lower structure may include: a hollow rocker arranged at 

In [36]:
def process_query(query, INDEX_NAME):
    connection_args = {
        "host": _HOST,
        "port": _PORT
    }
    
    milvus = Milvus(embeddings_minilm, connection_args = connection_args, collection_name = INDEX_NAME)
    ret_docs_mmr = milvus.max_marginal_relevance_search(query, k=10)
    ret_docs_sim = milvus.similarity_search_with_score(query, k=10)

    ret_docs_mmr_tuples = [(doc.page_content, dict_to_tuple(doc.metadata)) for doc in ret_docs_mmr]
    ret_docs_sim_tuples = [(doc[0].page_content, dict_to_tuple(doc[0].metadata)) for doc in ret_docs_sim]

    combined_tuples = ret_docs_mmr_tuples + ret_docs_sim_tuples
    unique_tuples = set(combined_tuples)

    merged_results = [Document(page_content=content, metadata=dict(metadata)) for content, metadata in unique_tuples]
    return merged_results

In [37]:
# Initialize the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-reranker-large')
model = AutoModelForSequenceClassification.from_pretrained('BAAI/bge-reranker-large')
model.eval()

def rerank_pairs(query, candidate_responses, top_k=3):
    pairs = [(query, res.page_content) for res in candidate_responses]
    
    with torch.no_grad():
        inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt')
        scores = model(**inputs, return_dict=True).logits.view(-1).float()
    
    scores_list = scores.tolist()
    paired_scores = list(zip(candidate_responses, scores_list))
    reranked_results = sorted(paired_scores, key=lambda x: -x[1])
    return reranked_results[:top_k]

In [38]:
def get_results(query, collection_name):
    query_results = process_query(query, collection_name)
    print("\nRetrieved Chunks from Milvus:\n{}".format("\n\n".join(res.page_content for res in query_results)))
    reranked_results = rerank_pairs(query=query, candidate_responses=query_results)
    print("\nReranked Chunks:\n{}".format("\n\n".join(res.page_content for res in query_results)))

    reranked_results_json = []

    for result, _ in reranked_results:
        result_json = {
            "Patent_Text": result.page_content,
            "Doc_Identifier": result.metadata["Doc_Identifier"],
            "Level": result.metadata["Level"],
            "Assignee": result.metadata["Assignee"],
            "Inventor": result.metadata["Inventor"],
            "Title": result.metadata["Title"],
            "Abstract": result.metadata["Abstract"],
            "First_Claim": result.metadata["First_Claim"]
        }
        reranked_results_json.append(result_json)
    return reranked_results_json

In [39]:
results = get_results(query, INDEX_NAME)


Retrieved Chunks from Milvus:
Patent_Text: Title: Vehicle body lower structure
Abstract: A vehicle body lower structure may include: a hollow rocker arranged at a lower lateral part of a vehicle body and extending along a front-rear direction of the vehicle body; a power source arranged adjacent to the rocker; an energy absorbing member connected to the power source and arranged under the rocker; and a collar arranged between the rocker and the energy absorbing member. The rocker may be provided with a flange extending downward from a bottom plate of the rocker. A height of the collar above the energy absorbing member may be greater than a height of the flange.

Patent_Text: Title: Vehicle body lower structure
Abstract: The disclosure provides a vehicle body lower structure. The vehicle body lower structure includes: a deflector, disposed on a vehicle body and movable between a stowed position covering a lower part of the vehicle body and a deployed position protruding downward; and a

In [40]:
len(results)

3

In [41]:
results

[{'Patent_Text': 'Patent_Text: Title: Lower vehicle-body structure of electric vehicle\nAbstract: A lower vehicle-body structure of an electric vehicle to increase resistance to lateral collision and also improve front-end collision load transfer performance of a tunnel. The lower vehicle-body structure of an electric vehicle includes a floor having a tunnel formed in a front part at a middle in a vehicle-width direction; and a floor cross member spanning, above the floor, between side sills disposed on opposite sides of the floor. Also, a rear end of the tunnel is located directly in front of the floor cross member, and reinforcing brackets are provided extending from the tunnel toward an upper end of the floor cross member and connected to the upper end of the floor cross member.',
  'Doc_Identifier': 'US11299211B2',
  'Level': 'Patent Set',
  'Assignee': 'Mazda Motor Corp',
  'Inventor': 'Yamato MATSUDA , Nobuyuki Nakayama , Morihide Yamada , Katsuhito Kouno , Toshihiro Takebayashi'