In [1]:
import numpy as np

import pandas as pd

from keras.models import Sequential

from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D

from sklearn.model_selection import train_test_split

from sklearn.feature_extraction.text import CountVectorizer

from keras.preprocessing.text import Tokenizer

from keras.preprocessing.sequence import pad_sequences

from keras.utils.np_utils import to_categorical

import re

In [2]:
df = pd.read_csv("Mirena_TCS.csv", encoding='cp1252', error_bad_lines=False)
df.head()

Unnamed: 0,Sr No.,Reported_Event,EVENT Preferred Term (Seriousness) (Event),EVENT_Lower_Level_Term(Seriousness),EVENT Onset Date (Event),EVENT Seriousness Criteria (Event),EVENT Company Causality (Event Assessment),EVENT Reporter Causality (Event Assessment),SUSPECT PRODUCT Product (as reported),SUSPECT PRODUCT Product Name (Suspect)
0,1,1) IUD migrated to lower uterine segment/ IUS ...,1) Device dislocation (s); \n2) Genital haemor...,1) IUD migration (s); \n2) Genital bleeding (s...,1) Device dislocation : 2016; \n2) Genital hae...,1) Device dislocation : Medically Significan...,1) SAG0224B.282 - 1) Device dislocation : rela...,1) SAG0224B.282 - 1) Device dislocation : not ...,1) --,1) Mirena
1,2,1) Patient removed the IUD by herself;,1) Intentional medical device removal by patie...,1) Intentional medical device removal by patie...,1) Intentional medical device removal by patie...,,1) Study Drug - 1) Intentional medical device ...,1) Study Drug - 1) Intentional medical device ...,1) Mirena,1) Mirena
2,3,1) Patient was perforated during insertion pro...,1) Uterine perforation (s); \n2) Complication ...,1) Uterine perforation post procedural (s); \n...,1) Uterine perforation : --; \n2) Complication...,1) Uterine perforation : Medically Significa...,1) SAG0224B.282 - 1) Uterine perforation : rel...,1) SAG0224B.282 - 1) Uterine perforation : not...,1) --,1) Mirena
3,4,1) one to the arms of the T body was embeded i...,1) Embedded device (s); \n2) Abdominal pain (n);,1) IUD embedded (s); \n2) Abdominal pain (n);,1) Embedded device : 2016; \n2) Abdominal pain...,1) Embedded device : Medically Significant /,1) SAG0224B.282 - 1) Embedded device : related,1) SAG0224B.282 - 1) Embedded device : not rep...,1) --,1) Mirena
4,5,1) Mirena was partially expelled;,1) Device expulsion (n);,1) Partial expulsion of IUD (n);,1) Device expulsion : --;,,1) Study Drug - 1) Device expulsion : related,1) Study Drug - 1) Device expulsion : not repo...,1) Mirena,1) Mirena


In [3]:
df.columns

Index(['Sr No.', 'Reported_Event',
       'EVENT Preferred Term (Seriousness) (Event)',
       'EVENT_Lower_Level_Term(Seriousness)', 'EVENT Onset Date (Event)',
       'EVENT Seriousness Criteria (Event)',
       'EVENT Company Causality (Event Assessment)',
       'EVENT Reporter Causality (Event Assessment)',
       'SUSPECT PRODUCT Product (as reported)',
       'SUSPECT PRODUCT Product Name (Suspect)'],
      dtype='object')

## drop unwanted columns

In [4]:
df.drop(columns = ['Sr No.','EVENT Preferred Term (Seriousness) (Event)', 
                   'EVENT Onset Date (Event)',
                   'EVENT Seriousness Criteria (Event)',
                   'EVENT Company Causality (Event Assessment)',
                   'EVENT Reporter Causality (Event Assessment)',
                   'SUSPECT PRODUCT Product (as reported)',
                   'SUSPECT PRODUCT Product Name (Suspect)'],inplace = True)
df.head()

Unnamed: 0,Reported_Event,EVENT_Lower_Level_Term(Seriousness)
0,1) IUD migrated to lower uterine segment/ IUS ...,1) IUD migration (s); \n2) Genital bleeding (s...
1,1) Patient removed the IUD by herself;,1) Intentional medical device removal by patie...
2,1) Patient was perforated during insertion pro...,1) Uterine perforation post procedural (s); \n...
3,1) one to the arms of the T body was embeded i...,1) IUD embedded (s); \n2) Abdominal pain (n);
4,1) Mirena was partially expelled;,1) Partial expulsion of IUD (n);


## convert to lower case

In [5]:
df["Reported_Event"]=df["Reported_Event"].str.lower()
df["EVENT_Lower_Level_Term(Seriousness)"]=df["EVENT_Lower_Level_Term(Seriousness)"].str.lower()
df.head()

Unnamed: 0,Reported_Event,EVENT_Lower_Level_Term(Seriousness)
0,1) iud migrated to lower uterine segment/ ius ...,1) iud migration (s); \n2) genital bleeding (s...
1,1) patient removed the iud by herself;,1) intentional medical device removal by patie...
2,1) patient was perforated during insertion pro...,1) uterine perforation post procedural (s); \n...
3,1) one to the arms of the t body was embeded i...,1) iud embedded (s); \n2) abdominal pain (n);
4,1) mirena was partially expelled;,1) partial expulsion of iud (n);


## removal of digits & words containing digits

In [6]:
df['Reported_Event']=df['Reported_Event'].apply(lambda x: re.sub('\w*\d\w*','', x))
df['EVENT_Lower_Level_Term(Seriousness)']=df['EVENT_Lower_Level_Term(Seriousness)'].apply(lambda x: re.sub('\w*\d\w*','', x))
df.head()

Unnamed: 0,Reported_Event,EVENT_Lower_Level_Term(Seriousness)
0,) iud migrated to lower uterine segment/ ius n...,) iud migration (s); \n) genital bleeding (s);...
1,) patient removed the iud by herself;,) intentional medical device removal by patien...
2,) patient was perforated during insertion proc...,) uterine perforation post procedural (s); \n)...
3,) one to the arms of the t body was embeded in...,) iud embedded (s); \n) abdominal pain (n);
4,) mirena was partially expelled;,) partial expulsion of iud (n);


In [7]:
#the string.punctuation in python contains the following punctuation symbols
#     !"#$%&\'()*+,-./:;<=>?@[\\]^_{|}~`


#import spacy
import string
df["Reported_Event"] = df["Reported_Event"].astype(str)
df["EVENT_Lower_Level_Term(Seriousness)"] = df["EVENT_Lower_Level_Term(Seriousness)"].astype(str)
PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(Event):
    return Event.translate(str.maketrans('', '', PUNCT_TO_REMOVE))

df["Reported_Event"]= df["Reported_Event"].apply(lambda Event: remove_punctuation(Event))
df["EVENT_Lower_Level_Term(Seriousness)"]= df["EVENT_Lower_Level_Term(Seriousness)"].apply(lambda Event: remove_punctuation(Event))
df.head()

Unnamed: 0,Reported_Event,EVENT_Lower_Level_Term(Seriousness)
0,iud migrated to lower uterine segment ius not...,iud migration s \n genital bleeding s \n post...
1,patient removed the iud by herself,intentional medical device removal by patient n
2,patient was perforated during insertion proce...,uterine perforation post procedural s \n devi...
3,one to the arms of the t body was embeded in ...,iud embedded s \n abdominal pain n
4,mirena was partially expelled,partial expulsion of iud n


In [8]:
# Remove Emails
df["Reported_Event"] = [re.sub('\S*@\S*\s?', '', sent) for sent in df["Reported_Event"]]
df["EVENT_Lower_Level_Term(Seriousness)"] = [re.sub('\S*@\S*\s?', '', sent) for sent in df["EVENT_Lower_Level_Term(Seriousness)"]]

df.head()

Unnamed: 0,Reported_Event,EVENT_Lower_Level_Term(Seriousness)
0,iud migrated to lower uterine segment ius not...,iud migration s \n genital bleeding s \n post...
1,patient removed the iud by herself,intentional medical device removal by patient n
2,patient was perforated during insertion proce...,uterine perforation post procedural s \n devi...
3,one to the arms of the t body was embeded in ...,iud embedded s \n abdominal pain n
4,mirena was partially expelled,partial expulsion of iud n


In [9]:
# Remove new line characters
df["Reported_Event"] = [re.sub('\s+', ' ', sent) for sent in df["Reported_Event"]]
df["EVENT_Lower_Level_Term(Seriousness)"] = [re.sub('\s+', ' ', sent) for sent in df["EVENT_Lower_Level_Term(Seriousness)"]]
df.head(20)

Unnamed: 0,Reported_Event,EVENT_Lower_Level_Term(Seriousness)
0,iud migrated to lower uterine segment ius not...,iud migration s genital bleeding s post coita...
1,patient removed the iud by herself,intentional medical device removal by patient n
2,patient was perforated during insertion proce...,uterine perforation post procedural s device ...
3,one to the arms of the t body was embeded in ...,iud embedded s abdominal pain n
4,mirena was partially expelled,partial expulsion of iud n
5,mirena expelled with tampon,iud expelled n
6,embedmentinto uterine myometruim not throughp...,iud embedded s device insertion failed n devi...
7,pregnant weeks and days pregnant tried to loc...,pregnancy with iud s iud dislocation s vomiti...
8,device was expelled,iud expelled n
9,coming out “on a friday” when she was at home...,iud expelled n


## Stop word removal

In [10]:
# NLTK Stop words
from nltk.corpus import stopwords
stop_words = stopwords.words('english')
stop_words.extend(['n','s'])
print(stop_words)

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [11]:
#STOPWORDS = set(stopwords.words('english'))
def remove_stopwords(text):
    return " ".join([word for word in str(text).split() if word not in stop_words])

In [12]:
df["Reported_Event"] = df["Reported_Event"].apply(lambda text: remove_stopwords(text))
df["EVENT_Lower_Level_Term(Seriousness)"] = df["EVENT_Lower_Level_Term(Seriousness)"].apply(lambda text: remove_stopwords(text))

df.head()

Unnamed: 0,Reported_Event,EVENT_Lower_Level_Term(Seriousness)
0,iud migrated lower uterine segment ius fundal ...,iud migration genital bleeding post coital ble...
1,patient removed iud,intentional medical device removal patient
2,patient perforated insertion process attending...,uterine perforation post procedural device ins...
3,one arms body embeded patient myometriumone ar...,iud embedded abdominal pain
4,mirena partially expelled,partial expulsion iud


In [None]:
import torch
from transformers import BertTokenizer, BertModel
import argparse
import logging
import torch
from transformers import BertConfig, BertForPreTraining, load_tf_weights_in_bert
logging.basicConfig(level=logging.INFO)

In [1]:
pip install spark-nlp==3.0.1

Collecting spark-nlp==3.0.1
  Downloading spark_nlp-3.0.1-py2.py3-none-any.whl (146 kB)
Installing collected packages: spark-nlp
Successfully installed spark-nlp-3.0.1
Note: you may need to restart the kernel to use updated packages.
