In [1]:
!py -m pip install serpapi pandas python-dotenv


Collecting serpapi
  Downloading serpapi-0.1.5-py2.py3-none-any.whl.metadata (10 kB)
Downloading serpapi-0.1.5-py2.py3-none-any.whl (10 kB)
Installing collected packages: serpapi
Successfully installed serpapi-0.1.5


In [2]:
# Import required libraries
import serpapi
from dotenv import load_dotenv
import pandas as pd
import os

# Load environment variables from the .env file
load_dotenv()

True

In [3]:
all_extracted_data = []

In [11]:


# Your API Key can be found at https://serpapi.com/manage-api-key
client = serpapi.Client(api_key=os.getenv("SERPAPI_KEY"))   

 # Master data list used to create .csv file later on
page_number = 1         # Assign initial page_number value as 1

for _ in range(30):
    results = client.search({
            "engine": "google_patents", # Define engine
            "q": "(Automotive)",            # Your search query
            "page": page_number         # Page number, defined before
    })

    organic_results = results["organic_results"] 

    # Extract data from each result
    extracted_data = []
    for result in organic_results:
        data = {
            "position":result.get("position"),
            "rank": result.get("rank"),
            "patent_id": result.get("patent_id"),
            "serpapi_link": result.get("serpapi_link"),
            "title": result.get("title"),
            "priority_date": result.get("priority_date"),
            "filing_date": result.get("filing_date"),
            "grant_date": result.get("grant_date"),
            "publication_date": result.get("publication_date"),
            "inventor": result.get("inventor"),
            "assignee": result.get("assignee"),
            "publication_number": result.get("publication_number"),
            "language": result.get("language"),
            "thumbnail":result.get("thumbnail"),
            "pdf": result.get("pdf"),
            "page": page_number
        }

        extracted_data.append(data)

    # Add the extracted data to the master data list
    all_extracted_data.extend(extracted_data)   
    
    # Increment page number value by 1 or end the loop
    if "next" in results["serpapi_pagination"]:
        page_number +=1 
    else:
        break           


In [12]:
all_extracted_data

[{'position': 1,
  'rank': 0,
  'patent_id': 'patent/US10427604B2/en',
  'serpapi_link': 'https://serpapi.com/search.json?engine=google_patents_details&patent_id=patent%2FUS10427604B2%2Fen',
  'title': 'Vision system for a vehicle',
  'priority_date': '2000-03-02',
  'filing_date': '2018-08-27',
  'grant_date': '2019-10-01',
  'publication_date': '2019-10-01',
  'inventor': 'Kenneth Schofield',
  'assignee': 'Magna Electronics Inc.',
  'publication_number': 'US10427604B2',
  'language': 'en',
  'thumbnail': 'https://patentimages.storage.googleapis.com/24/7d/d7/f5c6b523a64b07/US10427604-20191001-D00000.png',
  'pdf': 'https://patentimages.storage.googleapis.com/57/a2/97/f38685612beffe/US10427604.pdf'},
 {'position': 2,
  'rank': 1,
  'patent_id': 'patent/US20210234767A1/en',
  'serpapi_link': 'https://serpapi.com/search.json?engine=google_patents_details&patent_id=patent%2FUS20210234767A1%2Fen',
  'title': 'Vehicle middleware',
  'priority_date': '2011-11-16',
  'filing_date': '2021-04-

In [27]:
csv_file = "extracted_data.csv" # Assign .csv file name to a variable
csv_columns = [                 # Define list of columns for your .csv file
            "position",
            "rank",
            "patent_id",
            "serpapi_link",
            "title",
            "priority_date",
            "filing_date",
            "grant_date",
            "publication_date",
            "inventor",
            "assignee",
            "publication_number",
            "language",
            "thumbnail",
            "pdf",
            "page"
    ]   


# Save all extracted data to a CSV file
df = pd.DataFrame(data=all_extracted_data)
df = df.drop_duplicates(subset='patent_id', keep="last")
df.to_csv(
    csv_file, 
    columns=csv_columns, 
    encoding="utf-8", 
    index=False
    )

In [28]:
df.head()

Unnamed: 0,position,rank,patent_id,serpapi_link,title,priority_date,filing_date,grant_date,publication_date,inventor,assignee,publication_number,language,thumbnail,pdf,page
10,1,0,patent/US10427604B2/en,https://serpapi.com/search.json?engine=google_...,Vision system for a vehicle,2000-03-02,2018-08-27,2019-10-01,2019-10-01,Kenneth Schofield,Magna Electronics Inc.,US10427604B2,en,https://patentimages.storage.googleapis.com/24...,https://patentimages.storage.googleapis.com/57...,1.0
11,2,1,patent/US20210234767A1/en,https://serpapi.com/search.json?engine=google_...,Vehicle middleware,2011-11-16,2021-04-16,,2021-07-29,Christopher P. Ricci,Autoconnect Holdings Llc,US20210234767A1,en,https://patentimages.storage.googleapis.com/aa...,https://patentimages.storage.googleapis.com/42...,1.0
12,3,2,patent/US10979875B2/en,https://serpapi.com/search.json?engine=google_...,System and method for wireless interface selec...,2011-01-14,2018-09-11,2021-04-13,2021-04-13,Lillian Lei Dai,"Cisco Technology, Inc.",US10979875B2,en,https://patentimages.storage.googleapis.com/46...,https://patentimages.storage.googleapis.com/31...,1.0
13,4,3,patent/US10320836B2/en,https://serpapi.com/search.json?engine=google_...,Automotive ECU controller and data network hav...,2017-01-03,2018-07-16,2019-06-11,2019-06-11,Tal Efraim Ben David,Karamba Security Ltd.,US10320836B2,en,https://patentimages.storage.googleapis.com/50...,https://patentimages.storage.googleapis.com/40...,1.0
14,5,4,patent/US11790420B2/en,https://serpapi.com/search.json?engine=google_...,Visual discovery tool for automotive manufactu...,2016-10-18,2021-01-04,2023-10-17,2023-10-17,Jeffrey Stuart Cotton,"Autoalert, Llc",US11790420B2,en,https://patentimages.storage.googleapis.com/38...,https://patentimages.storage.googleapis.com/f7...,1.0


In [29]:
df.shape

(300, 16)