In [1]:
import warnings
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import drive
import os
import json

warnings.filterwarnings('ignore', category=FutureWarning)

In [2]:
drive.mount('/content/drive/')
os.chdir('drive/MyDrive/Colab Notebooks/PLP_Practice_Module/data')

Mounted at /content/drive/


In [3]:
!pwd

/content/drive/MyDrive/Colab Notebooks/PLP_Practice_Module/data


In [4]:
with open('data.json') as f:
  data = json.load(f)

In [5]:
len(data)

75

In [6]:
question_df = pd.DataFrame(columns=['id', 'question', 'type'])

In [7]:
for key, value in data.items():
  question_type = 'clinical' if value['question_type'] == 'C' else 'biological'
  new_row = { 'id': key, 'question': value['question'], 'type': question_type }
  question_df = question_df.append(new_row, ignore_index=True)
  print(key, value['question'], question_type)

1 What causes muscle spasm? clinical
2 What does duloxetine do? biological
3 How can i reduce my potassium levels? clinical
4 How is diabetes diagnosed? clinical
5 How to treat a bakers cyst? clinical
6 What is the best thyroid drug? biological
7 How is strep throat treated? clinical
8 Who is prescribed perphenazine? clinical
9 How to dislodge meat from throat? clinical
10 I have a cut on my arm and my fingers are sweating and i feel pain in my fingers. What can i do to stop the pain? clinical
11 How to treat phda1? biological
12 How can asthma be prevented? clinical
13 How to eliminate nocturnal atrial fibrillation? clinical
14 How to increase body immunity against covid 19? clinical
15 How does human body maintain hypokalemia shock homeostasis? biological
16 How long to recover immune system from prednisone treatment? biological
17 How do you assist an unconscious victim who is already vomiting? clinical
18 To treat abnormal ketones result of pregnant woman what food should be eaten?

In [None]:
question_df.head()

Unnamed: 0,id,question,type
0,1,What causes muscle spasm?,clinical
1,2,What does duloxetine do?,biological
2,3,How can i reduce my potassium levels?,clinical
3,4,How is diabetes diagnosed?,clinical
4,5,How to treat a bakers cyst?,clinical


In [8]:
question_df.to_csv('./questions_df.csv', index=False)

In [9]:
question_df['type'].value_counts()

clinical      52
biological    23
Name: type, dtype: int64

In [10]:
def extract_abstract_and_adaptation(abstract):
  result = []
  for _, value in abstract.items():
    if isinstance(value, str):
      continue
    title = value['Title']
    abstract = value['abstract']
    adapts = value['adaptations']
    for _, adapt in adapts.items():
      result.append({'title': title, 'abstract': abstract, 'adaptation': adapt})
  return result

In [11]:
reshape_data = {}
pair_match_df = pd.DataFrame(columns=['seqA', 'seqB', 'question', 'question_type'])

In [12]:
for key, value in data.items():
  reshape_adapt = extract_abstract_and_adaptation(value)
  reshape_data[key] = reshape_adapt


In [None]:
for idx, items_array in reshape_data.items():
  for item in items_array:
    abstract = item['abstract']
    adaptation = item['adaptation']
    if len(abstract) == len(adaptation):
      for key, value in abstract.items():
        new_row = { 'seqA': value, 'seqB': adaptation[key], 'question': data[idx]['question'], 'question_type': data[idx]['question_type'] }
        pair_match_df = pair_match_df.append(new_row, ignore_index=True)

In [None]:
pair_match_df.head()

Unnamed: 0,seqA,seqB,question,question_type
0,Muscle cramps are a common problem characteriz...,Muscle cramps are a common problem represented...,What causes muscle spasm?,C
1,"These true cramps, which originate from periph...","These true cramps, coming from nerves outside ...",What causes muscle spasm?,C
2,"Medical history, physical examination, and a l...","Medical history, physical check-up, and lab sc...",What causes muscle spasm?,C
3,"Despite the ""benign"" nature of cramps, many pa...","Despite their harmless nature, cramps are unco...",What causes muscle spasm?,C
4,Treatment options are guided both by experienc...,Experience and limited medical studies guide t...,What causes muscle spasm?,C


In [None]:
# save reshaped data set first..
pair_match_df.to_csv('./pair_match_df.csv', index=False)

In [16]:
abstract_adaptation_pair_df = pd.DataFrame(columns=['abstract', 'adaptation', 'question', 'category', 'question_type'])

In [17]:
for idx, items_array in reshape_data.items():
  for item in items_array:
    abstract = item['abstract']
    adaptation = item['adaptation']
    new_row = { 'abstract': ' '.join(abstract.values()), 'adaptation': ' '.join(adaptation.values()), 'question': data[idx]['question'], 'category': idx, 'question_type': data[idx]['question_type']}
    abstract_adaptation_pair_df = abstract_adaptation_pair_df.append(new_row, ignore_index=True)

In [18]:
abstract_adaptation_pair_df.head()

Unnamed: 0,abstract,adaptation,question,category,question_type
0,Muscle cramps are a common problem characteriz...,Muscle cramps are a common problem represented...,What causes muscle spasm?,1,C
1,The dystonias are a group of disorders charact...,Dystonias are disorders with a lot of uncontro...,What causes muscle spasm?,1,C
2,"Muscle cramps result in continuous, involuntar...",Muscle cramps cause constant and unintended co...,What causes muscle spasm?,1,C
3,Exercise-Associated Muscle Cramps (EAMC) are a...,Exercise-Associated Muscle Cramps (EAMC) are a...,What causes muscle spasm?,1,C
4,Muscular cramp is a common symptom in healthy ...,"Muscle cramps are common in healthy people, es...",What causes muscle spasm?,1,C


In [19]:
abstract_adaptation_pair_df.to_csv('./abstract_adaptation_pair_df.csv', index=False)