## Lens API - Patent

In [1]:
import json
import requests
search_url = 'https://api.lens.org/patent/search'

In [2]:
## Get API authorisation code
api_auth = open("api_auth.json", "r")
lens_auth = json.load(api_auth)['lens']

api_auth.close()

In [14]:
def get_jurisdictions(countries):
    query_start = '''
                {
                    "bool": {
                        "should": [
            '''

    query_end = '''
                        ]
                    }
                },
            '''

    ## iterate through list of countries and builk query for each country it
    query_builder = []
    for c in countries:
        juridiction =   '''
                        {
                            "term" : {
                                "jurisdiction": "%s"
                            }
                        }
                        ''' % (c)
        
        query_builder.append(juridiction)

    ##
    jurisdictions = (",".join(query_builder))
    query = query_start + jurisdictions + query_end


    return query

In [18]:
query = get_jurisdictions(['US','AU'])
print(query)


                {
                    "bool": {
                        "should": [
            
                        {
                            "term" : {
                                "jurisdiction": "US"
                            }
                        }
                        ,
                        {
                            "term" : {
                                "jurisdiction": "AU"
                            }
                        }
                        
                        ]
                    }
                },
            


In [23]:
def term_builder(term, term_list):
    query_start = '''
                {
                    "bool": {
                        "should": [
            '''

    query_end = '''
                        ]
                    }
                },
            '''

    ## iterate through list of countries and builk query for each country it
    query_builder = []
    for i in term_list:
        term_string =   '''
                            {
                                "term" : {
                                    "%s": "%s"
                                }
                            }
                        ''' % (term, i)
        
        query_builder.append(term_string)

    ## join country list into one string and append to query for return
    terms = (",".join(query_builder))
    query = query_start + terms + query_end

    return query

In [24]:
query = term_builder('juridictions', ['US','AU'])
print(query)


                {
                    "bool": {
                        "should": [
            
                            {
                                "term" : {
                                    "juridictions": "US"
                                }
                            }
                        ,
                            {
                                "term" : {
                                    "juridictions": "AU"
                                }
                            }
                        
                        ]
                    }
                },
            


In [None]:
start_date = "2023-01-01"
end_date = "2023-12-31"
start_from = 0
size = 100      ### For paid licences change this number to 1,000 - 10,000

data = '''{
    "query": {
        "bool": {
            "must": [
                {
                    "bool": {
                        "should": [
                            {
                                "term" : {
                                    "publication_type": "GRANTED_PATENT"
                                }
                            },
                            {
                                "term" : {
                                    "publication_type": "GRANTED_PATENT"
                                }
                            }
                        ]
                    }
                },
                {
                    "bool": {
                        "should": [
                            {
                                "term" : {
                                  "jurisdiction": "US"
                                }
                            },
                            {
                                "term" : {
                                  "jurisdiction": "AU"
                                }
                            }
                        ]
                    }
                },
                {
                    "range" : {
                        "date_published": {
                            "gte": "%s",
                            "lte": "%s"
                        }
                    }
                
                }
            ]
        }
    },
    "sort": [{"date_published": "asc"}],
    "include": ["lens_id",  
                "abstract.text", 
                "date_published", 
                "publication_type",
                "biblio.application_reference.doc_number",
                "biblio.invention_title.text", 
                "biblio.classifications_cpc.classifications.symbol", 
                "biblio.parties.inventors.residence",
                "biblio.parties.applicants.residence"
                ],
    "from" : %d,
    "size" : %d
}''' % (start_date, end_date, start_from, size)

### Request query terms: https://docs.api.lens.org/request-patent.html
### Response column structure: https://docs.api.lens.org/response-patent.html

headers = {'Authorization': lens_auth, 'Content-Type': 'application/json'}
response = requests.post(search_url, data=data, headers=headers)
if response.status_code != requests.codes.ok:
  print(response.status_code)
else:
  print(response.text)

In [None]:
response.json()

In [24]:
f = open("../data/raw/" + f"{start_date}_to_{end_date}.json", "w", encoding='utf-8')
f.write(response.text)
f.close()

## Journal

In [1]:
import json
import requests

search_url = 'https://api.lens.org/scholarly/search'
auth_json = '../../api_auth.json' 
q_countries = ['United States', 'Australia']     ## set the countries to retrieve, see https://docs.api.lens.org/request-scholar.html
q_type = 'Journal'                               ## set the publication types to retrieve, see https://docs.api.lens.org/response-scholar.html
q_date = '2020'
q_size = 100                                     ## set the number of journals to return each query. For paid licences change this number to 1,000 - 10,000
max_limit = 300                                  ## set the limit on the number of results to query for. This will override the max results if lower.


In [19]:
# Define the filters for match
filters_dict = {
    'source.type': q_type,
    'source.country':  q_countries,                 
    'is_open_access': True,
    'has_abstract': True,
    'year_published': q_date
}

def build_query(filters_dict, start_date, end_date):
    # Initialize the query conditions list
    query_conditions = []

    # Iterate through the dictionary and build query conditions
    for key, value in filters_dict.items():
        if isinstance(value, list):
            # For list values (e.g., 'source.country'), use 'terms' query
            query_conditions.append({
                'terms': {key: value}
            })
        else:
            # For single values (e.g., 'source.type'), use 'match' query
            query_conditions.append({
                'match': {key: value}
            })
    
    date_range = {
        "range": {
            "date_published": {
                "gte": start_date,
                "lte": end_date
            }
        }
    }

    query_conditions.append(date_range)

    # Build the 'must' clause of the query
    query_must = {
        "bool": {
            "must": query_conditions
        }
    }

    # Build the final query
    query = {
        "query": query_must,
        "sort": [{"date_published": "asc"}], # sort with date published
        "size": q_size  # Number of results per page (adjust as needed)
    }
    
    return query


In [3]:
                                                  
###
# Get API authorisation code from file.
###
def get_auth():
    global authkey

    api_auth = open(auth_json, "r")
    authkey = json.load(api_auth)['lens']
    api_auth.close()

    return authkey


def get_response(query, start_from = 0):
    size = q_size      

    headers = {'Authorization': get_auth(), 'Content-Type': 'application/json'}
    response = requests.post(search_url, data=json.dumps(query), headers=headers)

    return response

In [20]:
start_date = "2023-01-01"
end_date = "2023-12-01"
query = build_query(filters_dict, start_date, end_date)
print(query)
response = get_response(query)
print(response)

{'query': {'bool': {'must': [{'match': {'source.type': 'Journal'}}, {'terms': {'source.country': ['United States', 'Australia']}}, {'match': {'is_open_access': True}}, {'match': {'has_abstract': True}}, {'match': {'year_published': '2020'}}, {'range': {'date_published': {'gte': '2023-01-01', 'lte': '2023-12-01'}}}]}}, 'sort': [{'date_published': 'asc'}], 'size': 100}
<Response [200]>


In [None]:
response_json = response.json()
filename = "../data/raw/journals/" + f"journals_{pub_year}_from_{start_from}.json"
f = open(filename, "w", encoding='utf-8')
f.write(response.text)
f.close()

In [17]:
query = {
    "query": {
        "bool": {
            "must": [
                {"match": {"source.type": "Journal"}},
                {"terms": {"source.country": ['United States', 'Australia']}},
                {"match": {"is_open_access": True}},
                {"match": {"has_abstract": True}},
                {
          "range": {
            "date_published": {
              "gte": "2023-01-01",
              "lte": "2023-12-01"
              }
            }
          }
              ]
        }  
    },
    "size": 10
}

In [None]:
headers = {"Authorization": get_auth(), 'Content-Type': 'application/json'}
response = requests.post(search_url, headers=headers, data=json.dumps(query))
if response.status_code != requests.codes.ok:
    print(response.status_code)
else:
    print(response.text)