# Important Functions

In [21]:
def word2features(sent, i):
    word = sent[i]
    
    features = {
        'word': word,
        'is_first': i == 0,
        'is_last': i == len(sent) - 1,
        'is_capitalized': word[0].upper() == word[0],
        'is_all_caps': word.upper() == word,
        'is_all_lower': word.lower() == word,
        'prefix-1': word[0],
        'prefix-2': word[:2],
        'prefix-3': word[:3],
        'suffix-1': word[-1],
        'suffix-2': word[-2:],
        'suffix-3': word[-3:],
        'prev_word': '' if i == 0 else sent[i - 1],
        'next_word': '' if i == len(sent) - 1 else sent[i + 1],
    }
    return features

def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def extract_features(tokens):
    return sent2features(tokens)

# STEP 1: LOAD MODEL

In [23]:
import joblib

# Load the CRF model from a file
crf = joblib.load('ckner.pkl')

# STEP 2: Define Predict Entities

In [24]:
def predict_entities(sentence):
    tokens = nltk.word_tokenize(sentence)
    features = [sent2features(tokens)]
    prediction = crf.predict(features)[0]  # Use the loaded CRF model to predict
    
    return list(zip(tokens, prediction))

# STEP 3: Define Predict Entities Function

In [25]:
def predict_entities(sentence):
    tokens = nltk.word_tokenize(sentence)
    features = [sent2features(tokens)]
    prediction = crf.predict(features)[0]  # Use the loaded CRF model to predict
    
    return list(zip(tokens, prediction))

# STEP 4: Test

In [26]:
new_sentence = "مەهدی ئۆزدەمیر لە بارۆی ئامەد رایگەیاند"
entities = predict_entities(new_sentence)

for word, tag in entities:
    print(f"{word} -> {tag}")

مەهدی -> B-PER
ئۆزدەمیر -> I-PER
لە -> O
بارۆی -> B-ORG
ئامەد -> I-ORG
رایگەیاند -> O


# Application Part

# STEP 1: Prepare Your Jupyter Notebook

In [1]:
import nltk
import joblib
import ipywidgets as widgets
from IPython.display import display
from nltk.tokenize import word_tokenize

# Load your trained CRF model
crf = joblib.load('ckner.pkl')

# Ensure nltk's word tokenizer is ready
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\bakht\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# STEP 2: Define Feature Extraction Functions

In [2]:
def word2features(sent, i):
    word = sent[i]
    
    features = {
        'word': word,
        'is_first': i == 0,
        'is_last': i == len(sent) - 1,
        'is_capitalized': word[0].upper() == word[0],
        'is_all_caps': word.upper() == word,
        'is_all_lower': word.lower() == word,
        'prefix-1': word[0],
        'prefix-2': word[:2],
        'prefix-3': word[:3],
        'suffix-1': word[-1],
        'suffix-2': word[-2:],
        'suffix-3': word[-3:],
        'prev_word': '' if i == 0 else sent[i - 1],
        'next_word': '' if i == len(sent) - 1 else sent[i + 1],
    }
    return features

def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

# STEP 3: Define the Prediction Function

In [3]:
def predict_entities(sentence):
    tokens = word_tokenize(sentence)
    features = [sent2features(tokens)]
    prediction = crf.predict(features)[0]
    return list(zip(tokens, prediction))

# STEP 4: Create Interactive Widgets

In [4]:
# Text area for user input
text_input = widgets.Textarea(
    value='Enter your text here',
    placeholder='Type something',
    description='Text:',
    disabled=False
)

# Output area to display the results
output = widgets.Output()

def on_button_click(b):
    with output:
        output.clear_output()
        sentence = text_input.value
        entities = predict_entities(sentence)
        for word, tag in entities:
            print(f"{word}: {tag}")

# Button to trigger the prediction
button = widgets.Button(description="Recognize Entities")
button.on_click(on_button_click)

# Display the interface
display(text_input, button, output)

Textarea(value='Enter your text here', description='Text:', placeholder='Type something')

Button(description='Recognize Entities', style=ButtonStyle())

Output()

# STEP 5: Run Voila