In [1]:
cd ../src

/workspace/Script/NLP/PII/src


In [2]:
import os
import gc
import sys
import json
import time
import torch
import joblib
import random
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from pathlib import Path
import plotly.express as px
import matplotlib.pyplot as plt

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', lambda x: '%.4f' % x)

In [3]:
from data.data_utils import get_offset_mapping
from data.dataset import FeedbackDataset

In [4]:
from transformers import DataCollatorWithPadding
from transformers import AutoTokenizer, AutoModel, AutoConfig

%env TOKENIZERS_PARALLELISM = true

env: TOKENIZERS_PARALLELISM=true


# Params

In [5]:
data_path = Path(r"/database/kaggle/PII/data")
os.listdir(data_path)

['train.json',
 'dubai-ar.zip',
 'pii_dataset_fixed.csv',
 'mixtral-8x7b-v1.json',
 '.~lock.lecture2.pptx#',
 'openaddr-collected-global.zip',
 'lecture2.pptx',
 'openaddr-collected-us_west-sa.zip',
 'test.json',
 'moredata_dataset_fixed.csv',
 'sample_submission.csv']

In [6]:
sample_df = pd.read_csv(data_path/'sample_submission.csv')
sample_df.shape

(26, 4)

In [7]:
sample_df.head(5)

Unnamed: 0,row_id,document,token,label
0,0,7,9,B-NAME_STUDENT
1,1,7,10,I-NAME_STUDENT
2,2,7,482,B-NAME_STUDENT
3,3,7,483,I-NAME_STUDENT
4,4,7,741,B-NAME_STUDENT


In [8]:
sample_df.label.unique()

array(['B-NAME_STUDENT', 'I-NAME_STUDENT'], dtype=object)

In [9]:
df = pd.read_json(data_path/'train.json')
df.shape

(6807, 5)

In [10]:
df.head(2)

Unnamed: 0,document,full_text,tokens,trailing_whitespace,labels
0,7,Design Thinking for innovation reflexion-Avril...,"[Design, Thinking, for, innovation, reflexion,...","[True, True, True, True, False, False, True, F...","[O, O, O, O, O, O, O, O, O, B-NAME_STUDENT, I-..."
1,10,Diego Estrada\n\nDesign Thinking Assignment\n\...,"[Diego, Estrada, \n\n, Design, Thinking, Assig...","[True, False, False, True, True, False, False,...","[B-NAME_STUDENT, I-NAME_STUDENT, O, O, O, O, O..."


In [11]:
LABEL2TYPE = ('NAME_STUDENT','EMAIL','USERNAME','ID_NUM', 'PHONE_NUM','URL_PERSONAL','STREET_ADDRESS','O')
len(LABEL2TYPE)

8

In [12]:
for name in LABEL2TYPE[:-1]:
    df[name] = ((df['labels'].transform(lambda x:len([i for i in x if i.split('-')[-1]==name ])))>0)*1

In [13]:
df['nb_labels'] = df['labels'].transform(lambda x:len([i for i in x if i!="O" ]))

In [14]:
df['nb_labels'].value_counts() 

0     5862
2      599
4      108
1       86
3       52
6       46
8       14
5       10
12       6
10       5
11       3
9        3
15       2
14       2
21       2
7        1
23       1
18       1
17       1
26       1
34       1
22       1
Name: nb_labels, dtype: int64

In [15]:
df[list(LABEL2TYPE)[:-1]+['nb_labels']].sum()

NAME_STUDENT       891
EMAIL               24
USERNAME             5
ID_NUM              33
PHONE_NUM            4
URL_PERSONAL        72
STREET_ADDRESS       2
nb_labels         2739
dtype: int64

In [16]:
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [17]:
seeds = [42]
folds_names = []
for K in [5]:  
    for seed in seeds:
        mskf = MultilabelStratifiedKFold(n_splits=K,shuffle=True,random_state=seed)
        name = f"fold_msk_{K}_seed_{seed}"
        df[name] = -1
        for fold, (trn_, val_) in enumerate(mskf.split(df,df[list(LABEL2TYPE)[:-1]])):
            df.loc[val_, name] = fold

In [18]:
df.groupby(name)[list(LABEL2TYPE)[:-1]].sum()

Unnamed: 0_level_0,NAME_STUDENT,EMAIL,USERNAME,ID_NUM,PHONE_NUM,URL_PERSONAL,STREET_ADDRESS
fold_msk_5_seed_42,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,178,5,1,7,1,14,0
1,178,5,1,7,1,14,0
2,179,5,1,6,1,15,1
3,178,4,1,6,0,15,1
4,178,5,1,7,1,14,0


In [19]:
model_name = 'microsoft/deberta-v3-large'
tokenizer = AutoTokenizer.from_pretrained(model_name)



In [20]:
ds = FeedbackDataset(df.copy(),tokenizer)

Loaded 6807 samples.


In [21]:
# idx = random.choice(ds.df[ds.df.STREET_ADDRESS>0].index)
doc = 204
idx = ds.df[ds.df.document==doc].index[0]
# Example usage:
# idx = 80
full_text_ds = ds.df.iloc[idx]['full_text']
tokens_ds = ds.df.iloc[idx]['tokens']
labels_ds = ds.df.iloc[idx]['labels']
idx,ds.df.iloc[idx]['nb_labels']

(12, 1)

In [22]:
len(labels_ds),len(tokens_ds)

(912, 912)

In [23]:
tokens_ds[:15]

['Reflection',
 '–',
 'Visualization',
 '|',
 'Deiby',
 '|',
 'Challenge',
 '|',
 'For',
 'some',
 'time',
 'I',
 'frequented',
 'a',
 'public']

In [24]:
txt = "Reflection – Visualization Deiby | Challenge For some time I frequented a public"

In [25]:
tokenizer.tokenize(txt)

['▁Reflection',
 '▁–',
 '▁Visualization',
 '▁Dei',
 'by',
 '▁|',
 '▁Challenge',
 '▁For',
 '▁some',
 '▁time',
 '▁I',
 '▁frequented',
 '▁a',
 '▁public']

In [26]:
ds[12]

{'text': "Reflection|–|Visualization|Deiby||||Challenge|For|some|time|I|frequented|a|public|gym|and|showered|there|before|my|work|day.|Although|the|showers|may|have|been|cleaned|daily,|they|were|not|cleaned|between|each|use|-|sometimes|the|floor|was,|well|“icky”|and|traditional|flip-flops|just|didn’t|cut|it.|I|searched|and|searched,|for|an|existing|product|that|would|provide|the|protection|I|needed,|without|any|success.|I|then|realized|a|need|for|a|solution|for|increased|sanitation|when|using|any|facility|that|has|communal|showers|-|gyms,|dorms,|camps,|athletic|training|facilities,|etc.|I|came|up|with|an|idea|that|would|provide|antimicrobial|protection|for|the|feet|while|using|these|facilities’|showers.|The|challenge|was|how|best|to|explain|my|idea,|prove|that|there|was|a|need,|offer|a|solution,|and|propose|it|to|potential|sponsors|for|development.|I|spoke|with|fellow|gym|rats,|college|students,|moms|of|summer|campers,|professional|athletes,|etc.|to|see|if|they|agreed|that|a|product|li

In [39]:
ds[12]['word_boxes'][:15,:]

tensor([[   1.,    0.,    2.,    1.],
        [   2.,    0.,    3.,    1.],
        [   3.,    0.,    4.,    1.],
        [-100.,    0.,  -99.,    1.],
        [   4.,    0.,    6.,    1.],
        [   6.,    0.,    7.,    1.],
        [   7.,    0.,    8.,    1.],
        [-100.,    0.,  -99.,    1.],
        [   8.,    0.,    9.,    1.],
        [   9.,    0.,   10.,    1.],
        [  10.,    0.,   11.,    1.],
        [  11.,    0.,   12.,    1.],
        [  12.,    0.,   13.,    1.],
        [  13.,    0.,   14.,    1.],
        [  14.,    0.,   15.,    1.]])

In [24]:
ds[12]['word_boxes'][:15,:]

tensor([[ 1.,  0.,  2.,  1.],
        [ 2.,  0.,  3.,  1.],
        [ 3.,  0.,  4.,  1.],
        [ 4.,  0.,  5.,  1.],
        [ 5.,  0.,  7.,  1.],
        [ 7.,  0.,  8.,  1.],
        [ 8.,  0.,  9.,  1.],
        [ 9.,  0., 10.,  1.],
        [10.,  0., 11.,  1.],
        [11.,  0., 12.,  1.],
        [12.,  0., 13.,  1.],
        [13.,  0., 14.,  1.],
        [14.,  0., 15.,  1.],
        [15.,  0., 16.,  1.],
        [16.,  0., 17.,  1.]])

In [27]:
full_text = df.iloc[idx]['full_text']
tokens = df.iloc[idx]['tokens']
labels = df.iloc[idx]['labels']
idx,df.iloc[idx]['nb_labels']

(12, 1)

In [45]:
import re

def replace_multiple_spaces(text):
    return re.sub(r'\s{2,}', ' | ', text)

# Examples
text1 = "amed      is a genius"
text2 = "amed  is a genius"
text3 = "amed                        is             a genius"

result1 = replace_multiple_spaces(text1)
result2 = replace_multiple_spaces(text2)
result3 = replace_multiple_spaces(text3)

print(result1)  # Output: amed | is a genius
print(result2)  # Output: amed | is a genius
print(result3)  # Output: amed | is a genius


amed | is a genius
amed | is a genius
amed | is | a genius


In [42]:
import re

def replace_multiple_spaces(text):
    return re.sub(r'\s+', '|', text)

# Examples
text1 = "amed      is a genius"
text2 = "amed  is a genius"
text3 = "amed                        is             a genius"

result1 = replace_multiple_spaces(text1)
result2 = replace_multiple_spaces(text2)
result3 = replace_multiple_spaces(text3)

print(result1)  # Output: amed | is a genius
print(result2)  # Output: amed | is a genius
print(result3)  # Output: amed | is | a | genius


amed|is|a|genius
amed|is|a|genius
amed|is|a|genius


In [28]:
offset_mapping = get_offset_mapping(full_text, tokens)
for token, offset,l in zip(tokens, offset_mapping,labels):
    if l!="O":
        print(f"{token}: {offset} : {l}")

Deiby: (29, 34) : B-NAME_STUDENT


In [36]:
import re
def remove_double_spaces(text):
    # Use a regular expression to replace consecutive spaces with a single space
    cleaned_text = re.sub(r'  ', '|', text)
    return cleaned_text

In [37]:
# text = "Reflection – Visualization   Deiby"
print(remove_double_spaces(full_text))

Reflection – Visualization| Deiby

Challenge||For some time I frequented a public gym and showered there before my work day.|Although the|showers may have been cleaned daily, they were not cleaned between each use - sometimes|the floor was, well “icky” and traditional flip-flops just didn’t cut it.|I searched and searched, for|an existing product that would provide the protection I needed, without any success.|I then|realized a need for a solution for increased sanitation when using any facility that has communal|showers - gyms, dorms, camps, athletic training facilities, etc.|I came up with an idea that would|provide antimicrobial protection for the feet while using these facilities’ showers.|The challenge|was how best to explain my idea, prove that there was a need, offer a solution, and propose it to|potential sponsors for development.||I spoke with fellow gym rats, college students, moms of summer campers, professional athletes,|etc. to see if they agreed that a product like mine c

In [30]:
full_text

"Reflection – Visualization   Deiby\n\nChallenge    For some time I frequented a public gym and showered there before my work day.  Although the  showers may have been cleaned daily, they were not cleaned between each use - sometimes  the floor was, well “icky” and traditional flip-flops just didn’t cut it.  I searched and searched, for  an existing product that would provide the protection I needed, without any success.  I then  realized a need for a solution for increased sanitation when using any facility that has communal  showers - gyms, dorms, camps, athletic training facilities, etc.  I came up with an idea that would  provide antimicrobial protection for the feet while using these facilities’ showers.  The challenge  was how best to explain my idea, prove that there was a need, offer a solution, and propose it to  potential sponsors for development.    I spoke with fellow gym rats, college students, moms of summer campers, professional athletes,  etc. to see if they agreed that

In [25]:
print(full_text)

Reflection – Visualization   Deiby

Challenge    For some time I frequented a public gym and showered there before my work day.  Although the  showers may have been cleaned daily, they were not cleaned between each use - sometimes  the floor was, well “icky” and traditional flip-flops just didn’t cut it.  I searched and searched, for  an existing product that would provide the protection I needed, without any success.  I then  realized a need for a solution for increased sanitation when using any facility that has communal  showers - gyms, dorms, camps, athletic training facilities, etc.  I came up with an idea that would  provide antimicrobial protection for the feet while using these facilities’ showers.  The challenge  was how best to explain my idea, prove that there was a need, offer a solution, and propose it to  potential sponsors for development.    I spoke with fellow gym rats, college students, moms of summer campers, professional athletes,  etc. to see if they agreed that a 

In [26]:
print(full_text_ds)

Reflection – Visualization Deiby | Challenge For some time I frequented a public gym and showered there before my work day. Although the showers may have been cleaned daily, they were not cleaned between each use - sometimes the floor was, well “icky” and traditional flip-flops just didn’t cut it. I searched and searched, for an existing product that would provide the protection I needed, without any success. I then realized a need for a solution for increased sanitation when using any facility that has communal showers - gyms, dorms, camps, athletic training facilities, etc. I came up with an idea that would provide antimicrobial protection for the feet while using these facilities’ showers. The challenge was how best to explain my idea, prove that there was a need, offer a solution, and propose it to potential sponsors for development. I spoke with fellow gym rats, college students, moms of summer campers, professional athletes, etc. to see if they agreed that a product like mine cou

In [159]:
import spacy
from spacy import displacy
from pylab import cm, matplotlib
import os

colors = {
            'NAME_STUDENT': '#8000ff',
            'EMAIL': '#2b7ff6',
            'USERNAME': '#2adddd',
            'ID_NUM': '#80ffb4',
            'PHONE_NUM': 'd4dd80',
            'URL_PERSONAL': '#ff8042',
            'STREET_ADDRESS': '#ff0000'
         }


def visualize(full_text,offset_mapping,labels):
    
    ents = []
    for offset,lab in zip(offset_mapping,labels):
        ents.append({
                        'start': int(offset[0]), 
                         'end': int(offset[1]), 
                         'label': str(lab.split('-')[1]) #+ ' - ' + str(row['discourse_effectiveness'])
                    })

    doc2 = {
        "text": full_text,
        "ents": ents,
#         "title": "idx"
    }

    options = {"ents": list(colors.keys()), "colors": colors}
    displacy.render(doc2, style="ent", options=options, manual=True, jupyter=True)

In [160]:
offset_mapping = get_offset_mapping(full_text, tokens)
offset_mapping_ = [x for (x,y) in zip(offset_mapping,labels) if y!="O"]
labels_ = [x for x in labels if x!="O"]

In [161]:
visualize(full_text,offset_mapping_,labels_)

In [162]:
offset_mapping = get_offset_mapping(full_text_ds, tokens_ds)
offset_mapping_ = [x for (x,y) in zip(offset_mapping,labels_ds) if y!="O"]
labels_ = [x for x in labels if x!="O"]

In [163]:
visualize(full_text_ds,offset_mapping_,labels_)

In [164]:
pip install postal

Defaulting to user installation because normal site-packages is not writeable
Collecting postal
  Downloading postal-1.1.10.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: postal
  Building wheel for postal (setup.py) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py bdist_wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[39 lines of output][0m
  [31m   [0m running bdist_wheel
  [31m   [0m running build
  [31m   [0m running build_py
  [31m   [0m creating build
  [31m   [0m creating build/lib.linux-x86_64-cpython-310
  [31m   [0m creating build/lib.linux-x86_64-cpython-310/postal
  [31m   [0m copying postal/parser.py -> build/lib.linux-x86_64-cpython-310/postal
  [31m   [0m copying postal/tokenize.py -> build/lib.linux-x86_64-cpython-310/postal
  [31m   [0m copying postal/near_dupe.py -> build/lib.linux-x86_64-cp

[1;31merror[0m: [1mlegacy-install-failure[0m

[31m×[0m Encountered error while trying to install package.
[31m╰─>[0m postal

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for output from the failure.
[?25hNote: you may need to restart the kernel to use updated packages.


In [165]:
from postal.expand import expand_address
expand_address('Quatre vingt douze Ave des Champs-Élysées')

from postal.parser import parse_address
parse_address('The Book Club 100-106 Leonard St, Shoreditch, London, Greater London, EC2A 4RH, United Kingdom')

ModuleNotFoundError: No module named 'postal'

In [166]:
pip install geopy

Defaulting to user installation because normal site-packages is not writeable
Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.4/125.4 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting geographiclib<3,>=1.52
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.3/40.3 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.4.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
from geopy.geocoders import Nominatim

In [2]:
geolocator = Nominatim(user_agent="my_geocoder")
location = geolocator.geocode("aseem Mabunda 591 Smith Centers Apt. 656 Joshuamouth, RI 95963 ( The Netherlands) 410.526.1667 vpi@mn.nl | Mind Mapping, Challenge: For several years I have been working for an Asset manager in the Netherlands. During this period I have been involved in many projects. Certainly in the world of asset management, much has changed in recent years in the area of Law and Regulations")

if location:
    print(location.address)

In [4]:
text = "aseem Mabunda 591 Smith Centers Apt. 656 Joshuamouth, RI 95963 ( The Netherlands) 410.526.1667 vpi@mn.nl | Mind Mapping, Challenge: For several years I have been working for an Asset manager in the Netherlands. During this period I have been involved in many projects. Certainly in the world of asset management, much has changed in recent years in the area of Law and Regulations"

In [5]:
import spacy

In [8]:
!python -m spacy download en_core_web_sm

Defaulting to user installation because normal site-packages is not writeable
Collecting en-core-web-sm==3.6.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m78.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.6.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [9]:
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)

In [12]:
for ent in doc.ents:
#     if ent.label_ == "GPE":  # GPE refers to geopolitical entity, which can include addresses
    print(ent.label_
          ,ent.text)

ORG Mabunda 591 Smith Centers Apt
CARDINAL 656
PERSON Joshuamouth
GPE Netherlands
PERSON Mind Mapping
DATE several years
ORG Asset
GPE Netherlands
DATE recent years


In [13]:
pip install usaddress

Defaulting to user installation because normal site-packages is not writeable
Collecting usaddress
  Downloading usaddress-0.5.10-py2.py3-none-any.whl (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.9/63.9 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting probableparsing
  Downloading probableparsing-0.0.1-py2.py3-none-any.whl (3.1 kB)
Collecting python-crfsuite>=0.7
  Downloading python_crfsuite-0.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m59.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-crfsuite, probableparsing, usaddress
Successfully installed probableparsing-0.0.1 python-crfsuite-0.9.10 usaddress-0.5.10
Note: you may need to restart the kernel to use updated packages.


In [14]:
import usaddress

# text = "Your input text here"
parsed_address, address_type = usaddress.tag(text)

if address_type == 'Street Address':
    print(parsed_address)


RepeatedLabelError: 
ERROR: Unable to tag this string because more than one area of the string has the same label

ORIGINAL STRING:  aseem Mabunda 591 Smith Centers Apt. 656 Joshuamouth, RI 95963 ( The Netherlands) 410.526.1667 vpi@mn.nl | Mind Mapping, Challenge: For several years I have been working for an Asset manager in the Netherlands. During this period I have been involved in many projects. Certainly in the world of asset management, much has changed in recent years in the area of Law and Regulations
PARSED TOKENS:    [('aseem', 'Recipient'), ('Mabunda', 'Recipient'), ('591', 'AddressNumber'), ('Smith', 'StreetName'), ('Centers', 'StreetNamePostType'), ('Apt.', 'OccupancyType'), ('656', 'OccupancyIdentifier'), ('Joshuamouth,', 'PlaceName'), ('RI', 'StateName'), ('95963', 'ZipCode'), ('The', 'Recipient'), ('Netherlands)', 'Recipient'), ('410.526.1667', 'Recipient'), ('vpi@mn.nl', 'Recipient'), ('Mind', 'Recipient'), ('Mapping,', 'Recipient'), ('Challenge:', 'Recipient'), ('For', 'Recipient'), ('several', 'Recipient'), ('years', 'Recipient'), ('I', 'Recipient'), ('have', 'Recipient'), ('been', 'Recipient'), ('working', 'Recipient'), ('for', 'Recipient'), ('an', 'Recipient'), ('Asset', 'Recipient'), ('manager', 'Recipient'), ('in', 'Recipient'), ('the', 'Recipient'), ('Netherlands.', 'Recipient'), ('During', 'Recipient'), ('this', 'Recipient'), ('period', 'Recipient'), ('I', 'Recipient'), ('have', 'Recipient'), ('been', 'Recipient'), ('involved', 'Recipient'), ('in', 'Recipient'), ('many', 'Recipient'), ('projects.', 'Recipient'), ('Certainly', 'Recipient'), ('in', 'Recipient'), ('the', 'Recipient'), ('world', 'Recipient'), ('of', 'Recipient'), ('asset', 'Recipient'), ('management,', 'Recipient'), ('much', 'Recipient'), ('has', 'Recipient'), ('changed', 'Recipient'), ('in', 'Recipient'), ('recent', 'Recipient'), ('years', 'Recipient'), ('in', 'Recipient'), ('the', 'Recipient'), ('area', 'Recipient'), ('of', 'Recipient'), ('Law', 'Recipient'), ('and', 'Recipient'), ('Regulations', 'Recipient')]
UNCERTAIN LABEL:  Recipient

When this error is raised, it's likely that either (1) the string is not a valid person/corporation name or (2) some tokens were labeled incorrectly

To report an error in labeling a valid name, open an issue at https://github.com/datamade/usaddress/issues/new - it'll help us continue to improve probablepeople!

For more information, see the documentation at https://usaddress.readthedocs.io/