### Changing to the main directory

In [None]:
%cd ..

### Importing Necessary Libraries

In [2]:
from transformers import pipeline
from transformers import AutoTokenizer
from transformers import AutoModelForTokenClassification

import pandas as pd 

from utilities import clean_and_group_entities

### Load Fine-tuned Model

In [3]:
model_id = "disham993/electrical-ner-bert-base"

In [4]:
model_fine_tuned = AutoModelForTokenClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
nlp = pipeline("ner", model=model_fine_tuned, tokenizer=tokenizer, aggregation_strategy="simple")

Device set to use cuda:0


### Inferencing Model

In [5]:
example = """Texas Instruments LM358 op-amp requires dual power supply."""

ner_results = nlp(example)

for result in ner_results:
    print(result)

{'entity_group': 'VENDOR', 'score': 0.9095243, 'word': 'texas instruments', 'start': 0, 'end': 17}
{'entity_group': 'PRODUCT', 'score': 0.97422373, 'word': 'l', 'start': 18, 'end': 19}
{'entity_group': 'PRODUCT', 'score': 0.96979773, 'word': '##m', 'start': 19, 'end': 20}
{'entity_group': 'PRODUCT', 'score': 0.9726049, 'word': '##35', 'start': 20, 'end': 22}
{'entity_group': 'PRODUCT', 'score': 0.96891654, 'word': '##8', 'start': 22, 'end': 23}
{'entity_group': 'COMPONENT', 'score': 0.87857795, 'word': 'op', 'start': 24, 'end': 26}
{'entity_group': 'COMPONENT', 'score': 0.60933036, 'word': 'amp', 'start': 27, 'end': 30}


- To avoid occasional tagging of subword token, `clean_and_group_entities` function will be used
- We will use `pandas DataFrame` to visualize the entities in more better way

In [6]:
# Clean and group entities
cleaned_results = clean_and_group_entities(ner_results)

# Convert to DataFrame for better visualization
df = pd.DataFrame(cleaned_results)
df

Unnamed: 0,entity_group,word,start,end,score
0,VENDOR,texas instruments,0,17,0.909524
1,PRODUCT,lm358,18,23,0.968917
2,COMPONENT,op,24,26,0.878578
3,COMPONENT,amp,27,30,0.60933


In [7]:
example_2 = "Use the oscilloscope to measure the 2.4GHz signal."

In [8]:
ner_results = nlp(example_2)
df = pd.DataFrame(ner_results)
df

Unnamed: 0,entity_group,score,word,start,end
0,EQUIPMENT,0.995673,os,8,10
1,EQUIPMENT,0.995672,##ci,10,12
2,EQUIPMENT,0.995279,##llo,12,15
3,EQUIPMENT,0.995483,##scope,15,20
4,DESIGN_PARAM,0.994882,2,36,37
5,DESIGN_PARAM,0.995131,.,37,38
6,DESIGN_PARAM,0.994779,4,38,39
7,DESIGN_PARAM,0.995078,##gh,39,41
8,DESIGN_PARAM,0.994922,##z,41,42


In [9]:
# Clean and group entities
cleaned_results = clean_and_group_entities(ner_results)

# Convert to DataFrame for better visualization
df = pd.DataFrame(cleaned_results)
df

Unnamed: 0,entity_group,word,start,end,score
0,EQUIPMENT,oscilloscope,8,20,0.995279
1,DESIGN_PARAM,2.4ghz,36,42,0.994779


In [10]:
test_samples = [
    "During the testing phase, the Tektronix oscilloscope was used to analyze the 2.4GHz signal from the ESP32 microcontroller through a high-bandwidth SMA connector.",
    "The engineering team used MATLAB and LTSpice software to simulate the behavior of a complex RF circuit containing multiple BC547 transistors and 0.1μF ceramic capacitors operating at 915MHz.",
    "The PCB manufacturing process at Texas Instruments follows strict IPC-A-610 and RoHS standards, using high-grade FR4 substrate and lead-free solder for mounting surface-mount devices including 0402 resistors.",
    "Using the Keysight spectrum analyzer connected to a 50Ω impedance matched antenna, we measured the signal strength of the nRF52840 Bluetooth module transmitting at -20dBm through a quarter-wave monopole antenna.",
    "The Xilinx Vivado development suite was used to program the Artix-7 FPGA on a custom development board, which included a 100MHz crystal oscillator and multiple ADC channels for sensor interfacing via I2C protocol.",
]

In [11]:
ner_results = nlp(test_samples[0])
cleaned_results = clean_and_group_entities(ner_results)
pd.DataFrame(cleaned_results)

Unnamed: 0,entity_group,word,start,end,score
0,VENDOR,tektronix,30,39,0.996626
1,EQUIPMENT,oscilloscope,40,52,0.991559
2,DESIGN_PARAM,2.4ghz,77,83,0.995254
3,PRODUCT,esp32,100,105,0.537074
4,TECHNOLOGY,microcontroller,106,121,0.948696
5,COMPONENT,sma connector,147,160,0.589309


In [12]:
ner_results = nlp(test_samples[1])
cleaned_results = clean_and_group_entities(ner_results)
pd.DataFrame(cleaned_results)

Unnamed: 0,entity_group,word,start,end,score
0,SOFTWARE,matlab,26,32,0.966773
1,SOFTWARE,ltspice,37,44,0.934563
2,SOFTWARE,software,45,53,0.497983
3,TECHNOLOGY,rf,92,94,0.957223
4,COMPONENT,bc547 transistors,123,140,0.59534
5,DESIGN_PARAM,0.1μf,145,150,0.977733
6,MATERIAL,ceramic,151,158,0.943008
7,COMPONENT,capacitors,159,169,0.712325
8,DESIGN_PARAM,915mhz,183,189,0.991642


In [13]:
ner_results = nlp(test_samples[2])
cleaned_results = clean_and_group_entities(ner_results)
pd.DataFrame(cleaned_results)

Unnamed: 0,entity_group,word,start,end,score
0,TECHNOLOGY,pcb,4,7,0.909188
1,VENDOR,texas instruments,33,50,0.936554
2,STANDARD,ipc - a - 610,66,75,0.4313
3,MATERIAL,fr,113,115,0.484411
4,MATERIAL,lead,131,135,0.680823
5,COMPONENT,solder,141,147,0.509299
6,COMPONENT,0402 resistors,193,207,0.791619


In [14]:
ner_results = nlp(test_samples[3])
cleaned_results = clean_and_group_entities(ner_results)
pd.DataFrame(cleaned_results)

Unnamed: 0,entity_group,word,start,end,score
0,VENDOR,keysight,10,18,0.986368
1,EQUIPMENT,spectrum analyzer,19,36,0.970206
2,DESIGN_PARAM,50ω,52,55,0.991599
3,PRODUCT,nrf52840,122,130,0.906665
4,DESIGN_PARAM,-20dbm,164,170,0.650221
5,DESIGN_PARAM,quarter,181,188,0.78622
6,COMPONENT,mono,194,198,0.496793


In [15]:
ner_results = nlp(test_samples[4])
cleaned_results = clean_and_group_entities(ner_results)
pd.DataFrame(cleaned_results)

Unnamed: 0,entity_group,word,start,end,score
0,VENDOR,xilinx,4,10,0.959474
1,VENDOR,art,60,63,0.422258
2,TECHNOLOGY,-7,65,67,0.423875
3,TECHNOLOGY,fpga,68,72,0.941977
4,DESIGN_PARAM,100mhz,121,127,0.989521
5,COMPONENT,crystal,128,135,0.694763
6,COMPONENT,oscillator,136,146,0.6396
7,COMPONENT,adc,160,163,0.874121
