In [2]:
import os
from dotenv import load_dotenv, get_key
load_dotenv()

True

In [3]:
prompt_keywords = r"""
You are a technical expert in generating keywords for searching Google Paptent Database for a given idea.
Return the list of comma seperated keywords for the following idea

**IDEA:** {topic}

**Note:**
1. Keywords should be comma seperated.
2. Do not add any comments except the keywords.
3. You can include most applicable synonymous technical terms for keywords.
"""

prompt_query = r"""
You are a technical expert in generating a effective query string for Google Patent Database Search 
for a given idea and also a set of keywords.
Return the list of comma seperated query string that can effectively return the list of best matched patents
from the patents.google.com patents database.

**IDEA:** {topic}

**Note:**
1. Query string should be comma seperated.
2. Do not add any comments except the query string.
3. At least three query string should be returned.
4. All keywords should be distributed in the complex queries.
5. You can use AND, OR, XOR, SAME, ADJ, NEAR, ), ( and more advance operator for generating query string.

**Output Format:**
((Wearable) AND (Noise Cancellation)),
((Wearable OR Mobile) AND (Noise (Cancellation OR Suppression))
"""

In [12]:
import time
import pandas as pd
from duckduckgo_search import DDGS
from googlesearch import search
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate

llm = ChatGoogleGenerativeAI(
    model="gemini-pro",
    google_api_key=os.getenv('GEMINI_API_KEY'),
    temperature=0.1
)

def generate_keywords(topic):
    prompt = PromptTemplate.from_template(prompt_keywords)
    chain = prompt | llm
    result = chain.invoke(
        {
            "topic": topic
        }
    )
    print(result.content)
    keywords = result.content.split(',')
    keywords = [keyword.strip() for keyword in keywords]
    return keywords

def generate_queries(topic, keywords):
    prompt = PromptTemplate.from_template(prompt_query)
    chain = prompt | llm
    result = chain.invoke(
        {
            "topic": topic,
            "keywords": keywords,
        }
    )
    print(result.content)
    queries = result.content.split(',')
    queries = [q.strip() for q in queries]
    return queries

def search_duckduckgo(query):
    with DDGS() as ddgs:
        results = ddgs.text(query, max_results=10)
    return results

def parse_results_to_dataframe(results):
    data = []
    for result in results:
        data.append({
            'Title': result.get('title'),
            'URL': result.get('href'),
            'Description': result.get('body')
        })
    df = pd.DataFrame(data)
    return df

def search_prior_art(queries):
    full_result = []
    for query in queries:
        full_query = f"https://patents.google.com?q={query}"
        results = search_duckduckgo(full_query)
        full_result.extend(results)
        time.sleep(2)
    return parse_results_to_dataframe(full_result)

topic = "A new method for efficient solar energy conversion using nanotechnology."

keywords = generate_keywords(topic)
queries = generate_queries(topic, keywords)
prior_arts = search_prior_art(queries)
prior_arts

nanotechnology, solar energy conversion, solar cell efficiency, photovoltaic, renewable energy, thin film solar cells, quantum dots, plasmonics, light trapping, energy harvesting
((Nanotechnology OR Nano) AND (Solar Energy OR Photovoltaic) AND (Conversion OR Efficiency)),
((Nanotechnology OR Nano) AND (Solar Energy OR Photovoltaic) AND (Conversion OR Efficiency) AND (Quantum Dots OR Perovskite)),
((Nanotechnology OR Nano) AND (Solar Energy OR Photovoltaic) AND (Conversion OR Efficiency) AND (Thin Film OR Multijunction))


Unnamed: 0,Title,URL,Description
0,Google Patents,https://patents.google.com/,"Search within the title, abstract, claims, or ..."
1,Search for patents | USPTO - United States Pat...,https://www.uspto.gov/patents/search,Patent Public Search. The Patent Public Search...
2,Patent Public Search | USPTO,https://www.uspto.gov/patents/search/patent-pu...,The https:// ensures that you are connecting t...
3,US5307162A - Cloaking system using optoelectro...,https://patents.google.com/patent/US5307162A/en,The Cloaking System is designed to operate in ...
4,"Google Patents - Wikipedia, the free encyclopedia",https://en.wikipedia.org/wiki/Google_Patents,Wikipedia entry for Google Patents.Google Pate...
5,Patent Public Search Basic (PPUBS Basic) - Uni...,https://ppubs.uspto.gov/pubwebapp/static/pages...,"To start a quick lookup, enter a single patent..."
6,US6362718B1 - Motionless electromagnetic gener...,https://patents.google.com/patent/US6362718B1/en,Because the electromagnetic generator 10 is se...
7,"Justia Patents Search - US Patent, Patent Appl...",https://patents.justia.com/,Search and research millions of US patents for...
8,WIPO - Search International and National Paten...,https://patentscope.wipo.int/search/,This patent search tool allows you not only to...
9,Patent Center - United States Patent and Trade...,https://patentcenter.uspto.gov/search,Patent Center allows users to search and view ...


In [10]:
from googlesearch import search
import pandas as pd
import time

def search_google(query, max_results=10):
    results = []
    for result in search(query, num_results=max_results, advanced=True):
        results.append({
            'Title': result.title,
            'URL': result.url,
            'Description': result.description
        })
    return results

def parse_results_to_dataframe(results):
    return pd.DataFrame(results)

def search_prior_art(queries):
    full_result = []
    for query in queries:
        full_query = f"https://patents.google.com?q={query}"
        print(full_query)
        results = search_google(full_query, max_results=10)
        print(results)
        full_result.extend(results)
        time.sleep(2)  # Rate limit to avoid being blocked
    return parse_results_to_dataframe(full_result)

# Example usage
if __name__ == "__main__":
    queries = ["machine learning"]
    df = search_prior_art(queries)
df


https://patents.google.com?q=machine learning
[{'Title': 'Google Patents Advanced Search', 'URL': 'https://www.google.com/advanced_patent_search', 'Description': 'Search and read the full text of patents from around the world with Google Patents, and find prior art in our index of non-patent literature.'}, {'Title': 'Patent Research and Analysis Google Patents', 'URL': 'https://ipo.org/wp-content/uploads/2019/11/2019-10-Patent-Searching-Google-Patents.pdf', 'Description': 'To make prior art searching easier, Google Patents includes a copy of the technical documents and books indexed in Google. Scholar and Google Books. These\xa0...'}, {'Title': '<https patents google com patent CN110245436B en q= ...', 'URL': 'https://web.open-source-silicon.dev/t/22878645/https-patents-google-com-patent-cn110245436b-en-q-machine-le', 'Description': 'Aug 5, 2024 — https://patents.google.com/patent/CN110245436B/en?q=(machine+learning+and+circuit+design)&amp;oq=machine+learning+and+circuit+design · Open 

Unnamed: 0,Title,URL,Description
0,Google Patents Advanced Search,https://www.google.com/advanced_patent_search,Search and read the full text of patents from ...
1,Patent Research and Analysis Google Patents,https://ipo.org/wp-content/uploads/2019/11/201...,"To make prior art searching easier, Google Pat..."
2,<https patents google com patent CN110245436B ...,https://web.open-source-silicon.dev/t/22878645...,"Aug 5, 2024 — https://patents.google.com/paten..."
3,Measuring patent claim breadth using ...,https://cloud.google.com/blog/products/ai-mach...,"Jul 10, 2018 — In this post, we demonstrate a ..."
4,About Google Patents,https://support.google.com/faqs/answer/6390996...,Search and read the full text of patents from ...
5,Why Google's new patent applications are alarming,https://www.reddit.com/r/MachineLearning/comme...,Google recently submitted at least four patent...
6,Scraping Google Patents with requests only ret...,https://stackoverflow.com/questions/44419565/s...,I'm trying to scrape Google Patents using the ...
7,How AI improves patent analysis | Google Cloud...,https://cloud.google.com/blog/products/ai-mach...,"Nov 21, 2020 — In recent years the patent indu..."
8,(PDF) Google Patents: The global patent search...,https://www.researchgate.net/publication/28030...,"Oct 22, 2024 — This study begins with an overv..."
9,"US Patent, Patent Application and Patent Searc...",https://patents.justia.com/,Search and research millions of US patents for...
