In [1]:
from sklearn.cluster import KMeans
import numpy as np
import json

In [2]:
def get_similar_diseases(disease):
    return disease_dataset_to_FDA_disease_mapping.get(disease,[])

In [3]:
def read_file(path):
    data = None
    with open(path) as json_file: 
        data = json.load(json_file) 
    return data

In [4]:
disease_dataset_to_FDA_disease_mapping = read_file("zone_1/disease_dataset_FDA_disease.json")
FDA_disease_to_drugs_mapping = read_file("zone_1/disease_drugs.json")
FDA_drugs_to_reactions_mapping = read_file("zone_1/drugs_reactions.json")
disease_dataset_symptom_id_mapping = read_file("zone_1/disease_dataset_symptom_id.json")
FDA_reactions_to_disease_dataset_symptom_mapping = read_file("zone_1/disease_dataset_symptom_FDA_reactions.json")

In [5]:
def get_drugs_for_disease(disease):
    return FDA_disease_to_drugs_mapping.get(disease,[])

In [6]:
def get_reactions_for_drug(drug):
    return FDA_drugs_to_reactions_mapping.get(drug,[])

In [7]:
def get_similar_symptoms(reaction):
    return FDA_reactions_to_disease_dataset_symptom_mapping.get(reaction,[])

In [8]:
def construct_feature_vector(symptoms):
    num_symptoms = len(disease_dataset_symptom_id_mapping)
    fv = np.zeros((num_symptoms), dtype=int)
    for symptom in symptoms:
        id = disease_dataset_symptom_id_mapping.get(symptom)
        fv[id] = 1
    return fv

In [9]:
def recommend_drug(disease,symptoms):
    patient_fv = construct_feature_vector(symptoms)
    diseases = get_similar_diseases(disease)
    
    drug_name = []
    drug_fv = []
    for disease in diseases:
        drugs = get_drugs_for_disease(disease)
        for drug in drugs:
            symptoms = []
            reactions = get_reactions_for_drug(drug)
            for reaction in reactions:
                symptoms.extend(get_similar_symptoms(reaction))
            symptoms = list(set(symptoms))
            fv = construct_feature_vector(symptoms)
            drug_name.append(drug)
            drug_fv.append(fv)
    kmeans = KMeans(n_clusters=10, random_state=0).fit(drug_fv)
    #metrics high seperation
    avoid_cluster  = kmeans.predict(patient_fv.reshape(1,-1))
    recommended_drugs= []
    avoided_drugs = []
    for index,drug in enumerate(drug_name):
        if kmeans.labels_[index] == avoid_cluster:
            avoided_drugs.append(drug)
        else:
            recommended_drugs.append(drug)
    
    return recommended_drugs

In [10]:
recommend_drug('GERD',['polyuria','red_sore_around_nose','back_pain','cold_hands_and_feets','coma'])

['WARFARINE',
 'TYMLOS',
 'EPCLUSA',
 'Perindopril',
 'TOPAMAX',
 'TOLTERODINE',
 'FOLBEE',
 'TIAPRIDE',
 'DOBUTAMINE',
 'ALENDRONATE SODIUM.',
 'DEKRISTOL',
 'ENALAPRIL',
 'PENICILLIN VK',
 'BENICAR HCT',
 'BRILIQUE',
 'SYSTANE ULTRA',
 'SINGULAIR',
 'LUMIFY',
 'MAGNESIUM CHLORIDE.',
 'DELSYM',
 'ESTRACE',
 'FLEXERIL',
 'ADALIMUMAB',
 'Daunorubicin',
 'PEMETREXED DISODIUM',
 'BECLOMETHASONE [BECLOMETASONE]',
 'Prenatal vitamins',
 'CELEBREX',
 'Circadin',
 'MELOXICAM.',
 'CASSIA SENNA',
 'MONTELUCAST',
 'CITRUCEL',
 'BICALUTAMIDE.',
 'VITAMIN D2',
 'CALCIUM W/COLECALCIFEROL',
 'OMEPRAZOLE MAGNESIUM.',
 'PANTOPRAZOL',
 'MIDODRINE HYDROCHLORIDE.',
 'SENNOSIDE',
 'DEPO-TESTOSTERONE',
 'HYDROXYZINE HYDROCHLORIDE.',
 'DOXAZOSIN MESILATE',
 'CENTRUM SILVER /07431401/',
 'CARDENALIN',
 'AMOXICILLIN/CLAVULANATE POTASSIUM',
 'CORGARD',
 'POLARAMINE',
 'Mepolizumab',
 'Acetylsalicylic Acid',
 'HYDROXYZINE',
 'BROMAZEPAM',
 'TEPRENONE',
 'MYSOLINE',
 'BIOFERMIN /01617201/',
 'LUMIGAN',
 'VALSART