In [2]:
import pandas as pd
import json
import numpy as np

def dataPipLine(saveF=True,fileName="outputFile"):
    drugs=pd.read_csv('drugs.csv')
    clinical_trials=pd.read_csv('clinical_trials.csv')
    pubmed=pd.read_csv('pubmed.csv')
    dataJson={}
    for index, rowD in drugs.iterrows():
        #drug=rowD['drug']
        drugID=rowD['atccode']
        drug=rowD['drug']
        dataJson[drugID]=[]
        for index,row in pubmed[pubmed['title'].str.contains("(?i)"+drug.lower())].iterrows():
            dataJson[drugID].append(
            {
                'pubmed':{
                    'id':row['id'],
                    'date':row['date']
                }
            })
        for indexT,rowT in clinical_trials[clinical_trials['scientific_title'].str.contains("(?i)"+drug.lower())].iterrows():
            dataJson[drugID].append(
            {
                'ClinicalTrial':{
                    'id':rowT['id'],
                    'date':rowT['date']
                }
            })
        uniqDates=pubmed[pubmed['title'].str.contains("(?i)"+drug.lower())]['date'].unique()
        for date in uniqDates:
            uniqsJournals=pubmed[pubmed['date'].str.contains("(?i)"+date.lower())]['journal'].unique()
            for journal in uniqsJournals:
                dataJson[drugID].append(
                {
                    'journal':{
                        'name':journal,
                        'date':date
                    }
                }
                )
    if saveF:
        nameF=fileName+".json"
        with open(nameF,'w') as file:
            json.dump(dataJson,file,indent=4)

dataPipLine(fileName="outputFile")
    

In [3]:
import pandas as pd
import json
import numpy as np

class dataPipLine(object):

    def __init__(self,  drugsF="drugs.csv",
                        clinicF="clinical_trials.csv",
                        pubmedF="pubmed.csv") -> None:
        """dataPipLine Object, this implements the method dpGetJson for process de data in arg

        Args:
            drugsF (str, optional): File with drugs information. Defaults to "drugs.csv".
            clinicF (str, optional): File with clinical trials information. Defaults to "clinical_trials.csv".
            pubmedF (str, optional): File . Defaults to "pubmed.csv".
        """
        super().__init__()
        self.drugsF=drugsF
        self.clinicF=clinicF
        self.pubmedF=pubmedF


    def dPGetJson(self,saveF=True,fileName="outputFile"):
        """This method allows to obtain a json object after data processing

        Args:
            saveF (bool, optional): Allows to save the JSON file. Defaults to True.
            fileName (str, optional): Necessary to save the JSON file, use without file extension. Defaults to "outputFile".

        Returns:
            [Json object]: The JSON that contains the processed information
        """
        drugs=pd.read_csv(self.drugsF)
        clinical_trials=pd.read_csv(self.clinicF)
        
        clinical_trials=clinical_trials.fillna({"id":"No ID","scientific_title":"No title","date":"No date","journal":"No Journal"})
        clinical_trials=clinical_trials.replace('^\s*$',"NO DATA",regex=True)
        
        pubmed=pd.read_csv(self.pubmedF)
        dataJson={}
        for index, rowD in drugs.iterrows():
            #drug=rowD['drug']
            drugID=rowD['atccode']
            drug=rowD['drug']
            dataJson[drugID]=[]
            for index,row in pubmed[pubmed['title'].str.contains("(?i)"+drug.lower())].iterrows():
                dataJson[drugID].append(
                {
                    'pubmed':{
                        'id':row['id'],
                        'date':row['date']
                    }
                })
            for indexT,rowT in clinical_trials[clinical_trials['scientific_title'].str.contains("(?i)"+drug.lower())].iterrows():
                dataJson[drugID].append(
                {
                    'ClinicalTrial':{
                        'id':rowT['id'],
                        'date':rowT['date']
                    }
                })
            uniqDates=pubmed[pubmed['title'].str.contains("(?i)"+drug.lower())]['date'].unique()
            for date in uniqDates:
                uniqsJournals=pubmed[pubmed['date'].str.contains("(?i)"+date.lower())]['journal'].unique()
                for journal in uniqsJournals:
                    dataJson[drugID].append(
                    {
                        'journal':{
                            'name':journal,
                            'date':date
                        }
                    }
                    )
        if saveF:
            nameF=fileName+".json"
            with open(nameF,'w') as file:
                json.dump(dataJson,file,indent=4)
        return dataJson

newDataPipline=dataPipLine()
jsonOutput=newDataPipline.dPGetJson()


In [4]:
def processJSON(jsonData):
    """Get the name of the journal that mentions the most different drugs

    Args:
        jsonData ([dic]): is the result of the dataPipLine.dPgetJson function

    Returns:
        [list]: [is a list of the journal that mentions the most different drougs], if is only one Journal the return is the name
    """
    journalsTotal=[]
    for key, values in jsonData.items():
        # print("==========================")
        for value in values:
            if  'journal' in value:
                journalsTotal.append(value['journal']['name'])

    list_set=set(journalsTotal)
    uniqueJournals=(list(list_set))
    countsJ=[journalsTotal.count(journal) for journal in uniqueJournals]

    returnListJournal=[]
    if countsJ.count(max(countsJ))>1:
        for i in range(countsJ.count(max(countsJ))):
            maxJourn=uniqueJournals[countsJ.index(max(countsJ))]
            returnListJournal.append(maxJourn)
            countsJ.remove(max(countsJ))
            uniqueJournals.remove(maxJourn)
        return returnListJournal
    else:
        maxJourn=uniqueJournals[countsJ.index(max(countsJ))]
        return maxJourn

listaJour=processJSON(jsonOutput)
print(listaJour)

['The journal of allergy and clinical immunology. In practice', 'Psychopharmacology']
