In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv('imdrf.csv')

In [3]:
df.head()

Unnamed: 0,code,term,definition,non-imdrf code/term,status,status description,codehierarchy,non-imdrf code,primary category,secondary category
0,F01,Change in Therapeutic Response,Change in response to treatment or cure of a d...,,,,F01,,,
1,F0101,Therapeutic Response Decreased,A reduction in the desirable and beneficial ef...,,,,F01|F0101,,,
2,F0102,Therapeutic Response Increased,An increase in the desirable and beneficial ef...,,,,F01|F0102,,,
3,F0103,Unexpected Therapeutic Effects,Unanticipated desirable and beneficial effects...,,,,F01|F0103,,,
4,F02,Death,The cessation of life.,,,,F02,,,


In [4]:
user_input = 'bleeding'


In [5]:
# Create a TfidfVectorizer to transform the text data
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['term'])

# Transform the user input into a vector
user_vector = vectorizer.transform([user_input])

# Compute the cosine similarity between the user vector and all the text vectors
similarity_scores = cosine_similarity(X, user_vector)


In [6]:
# Get the indices of the top 3 scores
top_indices = similarity_scores.argsort(axis=0)[-3:].flatten()

# Get the top 3 terms, rows, and scores
top_terms = vectorizer.get_feature_names()
top_rows = df.iloc[top_indices]['term']
top_scores = similarity_scores[top_indices].flatten()

Term: abdominal
Row: Intermenstrual Bleeding
Score: 0.6736089335682713

Term: abnormal
Row: Genital Bleeding
Score: 0.6736089335682713

Term: aborted
Row: Hemorrhage/Bleeding
Score: 0.7483795166399424





array([1444, 1443, 1160], dtype=int64)

In [9]:
disp = pd.DataFrame(
   columns=('IMDRF Code', 'IMDRF Term', 'Score'))

In [11]:
top_scores_percent = (top_scores * 100).round(2)

In [12]:
top_scores_percent = (top_scores * 100).round(2)
top_scores_percent_str = [str(i)+'%' for i in top_scores_percent]
top_scores_percent_str.reverse()
codes = [df.at[i, 'code'] for i in top_indices]
codes.reverse()
terms = [df.at[i, 'term'] for i in top_indices]
terms.reverse()
disp['IMDRF Code'] = codes
disp['IMDRF Term'] = terms
disp['Score'] = top_scores_percent_str

In [13]:
top_scores_percent_str

['67.36%', '67.36%', '74.84%']

In [14]:
top_scores_percent_str.reverse()

In [15]:
top_scores_percent_str

['74.84%', '67.36%', '67.36%']

In [16]:
codes = [df.at[i, 'code'] for i in top_indices]

In [17]:
codes.reverse()

In [18]:
codes

['E0506', 'E1407', 'E140701']

In [19]:
terms = [df.at[i, 'term'] for i in top_indices]

In [20]:
terms.reverse()

In [21]:
terms

['Hemorrhage/Bleeding', 'Genital Bleeding', 'Intermenstrual Bleeding']

In [22]:
disp['IMDRF Code'] = codes
disp['IMDRF Term'] = terms
disp['Score'] = top_scores_percent_str

In [23]:
disp

Unnamed: 0,IMDRF Code,IMDRF Term,Score
0,E0506,Hemorrhage/Bleeding,74.84%
1,E1407,Genital Bleeding,67.36%
2,E140701,Intermenstrual Bleeding,67.36%


In [24]:
for i in df.term[:50]:
    print(i)
    

Change in Therapeutic Response
Therapeutic Response Decreased
Therapeutic Response Increased
Unexpected Therapeutic Effects
Death
Intrauterine Fetal Death
Brain Death
Delay to Diagnosis
Delay to Treatment/ Therapy
Disruption of Subsequent Medical Procedure
Exacerbation of Existing Condition
Hospitalization or Prolonged Hospitalization
Intensive Care
Fetal Harm
Inadequate/Inappropriate Treatment or Diagnostic Exposure
Absence of Treatment
Incompatible Blood Transfusion
Missed Dose
Underdose
Radiation Underdose
Overdose
Radiation Overdose
Minor Injury/ Illness / Impairment
Serious Injury/ Illness/ Impairment
Chronic Disease
Disability
Life Threatening Illness or Injury
Permanent Impairment
Temporary Impairment
Misdiagnosis/ Misclassification
Prolonged Episode of Care
Recognised Device or Procedural Complication
Reduction in Life Expectancy
Sedation
Rehabilitation
Surgical Intervention
Additional Surgery
Amputation
Device Explantation
Device Repositioning
Device Revision or Replacement
Mo

In [34]:
import pandas as pd
import random

# Sample data
samp1 = ['Infection', 'Bleeding', 'Hypertension', 'Neurological Dysfunction', 'Respiratory Failure', 
         'Pericardial Fluid', 'Cardiac Arrhythmia', 'Missed Dose', 'Underdose', 'Sedation']

# Create an empty DataFrame
df = pd.DataFrame(columns=('P1234/234', 'Subject Submission'))

# Populate the DataFrame
df.loc[0] = ['Event', 'Rate Indicator']
choices = random.sample(samp1, k=4)
for i, choice in enumerate(choices, start=1):
    df.loc[i] = [choice, f'{random.randint(1, 99)}%']

# Sort the Subject Submission column in descending order
df['Subject Submission'] = df['Subject Submission'].astype(str)
df.sort_values('Subject Submission', ascending=False, inplace=True)

# Reset the index
df.reset_index(drop=True, inplace=True)

# Print the DataFrame
print(df)



                  P1234/234 Subject Submission
0                     Event     Rate Indicator
1  Neurological Dysfunction                47%
2               Missed Dose                43%
3       Respiratory Failure                37%
4        Cardiac Arrhythmia                37%


In [36]:
import pandas as pd
import random

# Sample data
samp1 = ['Infection', 'Bleeding', 'Hypertension', 'Neurological Dysfunction', 'Respiratory Failure', 
         'Pericardial Fluid', 'Cardiac Arrhythmia', 'Missed Dose', 'Underdose', 'Sedation']

# Create an empty DataFrame for the first table
df1 = pd.DataFrame(columns=('P1234/234', 'Subject Submission'))

# Create an empty DataFrame for the second table
df2 = pd.DataFrame(columns=('P1234/234', 'Subject Submission'))

# Randomly choose unique elements from samp1 for both tables
choices = random.sample(samp1, k=4)

# Populate the first table
df1.loc[0] = ['Event', 'Rate Indicator']
for i, choice in enumerate(choices, start=1):
    df1.loc[i] = [choice, f'{random.randint(1, 99)}%']

# Sort the Subject Submission column in descending order for the first table
df1['Subject Submission'] = df1['Subject Submission'].astype(str)
df1.sort_values('Subject Submission', ascending=False, inplace=True)

# Reset the index for the first table
df1.reset_index(drop=True, inplace=True)

# Print the first table
print("First Table:")
print(df1)
print()

# Populate the second table
df2.loc[0] = ['Event', 'Rate Indicator']
for i, choice in enumerate(choices, start=1):
    df2.loc[i] = [choice, f'{random.randint(1, 99)}%']

# Sort the Subject Submission column in descending order for the second table
df2['Subject Submission'] = df2['Subject Submission'].astype(str)
df2.sort_values('Subject Submission', ascending=False, inplace=True)

# Reset the index for the second table
df2.reset_index(drop=True, inplace=True)

# Print the second table
print("Second Table:")
print(df2)


First Table:
            P1234/234 Subject Submission
0               Event     Rate Indicator
1  Cardiac Arrhythmia                99%
2   Pericardial Fluid                69%
3            Sedation                 6%
4            Bleeding                36%

Second Table:
            P1234/234 Subject Submission
0               Event     Rate Indicator
1            Sedation                96%
2   Pericardial Fluid                59%
3  Cardiac Arrhythmia                56%
4            Bleeding                45%
