In [1]:
from gliner2 import GLiNER2

# Load model
extractor = GLiNER2.from_pretrained("fastino/gliner2-base-v1")

# Extract common entities
text = "Apple Inc. CEO Tim Cook announced the new iPhone 15 by Apple Inc. in Cupertino, California on September 12, 2023."
results = extractor.extract_entities(
    text,
    ["company", "person", "product", "location", "date"]
)
print(results)

  from .autonotebook import tqdm as notebook_tqdm


ðŸ§   Model Configuration
Encoder model      : microsoft/deberta-v3-base
Counting layer     : count_lstm_v2
Token pooling      : first


You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'entities': {'company': ['Apple Inc.'], 'person': ['Tim Cook'], 'product': ['iPhone 15'], 'location': ['Cupertino', 'California'], 'date': ['September 12, 2023']}}


In [2]:
results = extractor.extract_entities(
    text,
    ["company", "person", "product", "location", "date"], include_confidence=False
)

results

{'entities': {'company': ['Apple Inc.'],
  'person': ['Tim Cook'],
  'product': ['iPhone 15'],
  'location': ['Cupertino', 'California'],
  'date': ['September 12, 2023']}}

In [3]:
results = extractor.extract_entities(
    text,
    ["company", "person", "product", "location", "date"], include_confidence=True, format_results=False
)

results

{'entities': [OrderedDict([('company',
                [('Apple Inc.', 1.0), ('Apple Inc.', 1.0)]),
               ('person', [('Tim Cook', 0.9999802112579346)]),
               ('product', [('iPhone 15', 0.999994158744812)]),
               ('location',
                [('Cupertino', 0.9939866065979004),
                 ('California', 0.9805236458778381)]),
               ('date', [('September 12, 2023', 0.9998268485069275)])])]}

In [4]:
for k, value in results['entities'][0].items():
    print(k, value)

company [('Apple Inc.', 1.0), ('Apple Inc.', 1.0)]
person [('Tim Cook', 0.9999802112579346)]
product [('iPhone 15', 0.999994158744812)]
location [('Cupertino', 0.9939866065979004), ('California', 0.9805236458778381)]
date [('September 12, 2023', 0.9998268485069275)]


In [5]:
# Use create_schema() for multi-task scenarios
schema = (extractor.create_schema()
    # Extract key entities
    .entities({
        "person": "Names of people, executives, or individuals",
        "company": "Organization, corporation, or business names", 
        "product": "Products, services, or offerings mentioned"
    })
    
    # Classify the content
    .classification("sentiment", ["positive", "negative", "neutral"])
    .classification("category", ["technology", "business", "finance", "healthcare"])
    
    # Extract structured product details
    .structure("product_info")
        .field("name", dtype="str")
        .field("price", dtype="str")
        .field("features", dtype="list")
        .field("availability", dtype="str", choices=["in_stock", "pre_order", "sold_out"])
)

# Comprehensive extraction in one pass
text = "Apple CEO Tim Cook unveiled the revolutionary iPhone 15 Pro for $999. The device features an A17 Pro chip and titanium design."

In [6]:
results = extractor.extract(text, schema, include_confidence=True, format_results=False)
results

{'product_info': [OrderedDict([('name', 'iPhone 15 Pro'),
               ('price', '$999'),
               ('features', [('A17 Pro chip', 1.0), ('titanium design', 1.0)]),
               ('availability', 'in_stock')])],
 'entities': [OrderedDict([('person', [('Tim Cook', 0.9999864101409912)]),
               ('company', [('Apple', 0.9999414682388306)]),
               ('product', [('iPhone 15 Pro', 0.999923586845398)])])],
 'sentiment': ('positive', 1.0),
 'category': ('technology', 0.99991774559021)}

In [7]:
results = extractor.extract(text, schema, format_results=True)
results

{'product_info': [{'name': 'iPhone 15 Pro',
   'price': '$999',
   'features': ['A17 Pro chip', 'titanium design'],
   'availability': 'in_stock'}],
 'entities': {'person': ['Tim Cook'],
  'company': ['Apple'],
  'product': ['iPhone 15 Pro']},
 'sentiment': 'positive',
 'category': 'technology'}