In [1]:
%config Completer.use_jedi=False

## Read the IRPHAR table

In [2]:
import pandas as pd

In [4]:
ir_pha_r = pd.read_csv("/Users/youcefsebiat/IdeaProjects/SNIIRAM-flattening/src/main/resources/IR_PHA_R/IR_PHA_R.csv", sep=";", index_col="Unnamed: 0", na_values="NaN")

# Keep only the lines with PHA_NOM_PA
ir_pha_r = ir_pha_r[ir_pha_r.PHA_NOM_PA.notnull()].copy()

ir_pha_r.head()

Unnamed: 0,PHA_CIP_C13,PHA_RGE_C13,PHA_MED_IMG,PHA_MED_NAT,PHA_MED_GEN_DPROD,PHA_DOS_PRA_DSES,PHA_PRD_LIB2,PHA_PPC_IND,PHA_MEX_IND,PHA_DOS_UNT_DSES,...,PHA_HTA_TOP,PHA_PRI_UND,PHA_EPH_COD,PHA_ATC_C07,PHA_ATC_C03,PHA_MED_NOM,PHA_EPH_LIB_DSES,PHA_PRS_IDE,PHA_ATC_L03,PHA_ATC_L07
0,3400931467731,,0,A,,ND,,**,N,ND,...,2,129,C05A2,WHOMEO,W99,AESCULUS COMP BOIR POM TB 20G 1,TOPIQUES ANTIHEMORROIDAIRES SANS CORTICOIDES,3146773,SPECIALITES HOMEOPATHIQUES,HOMEOPATHIE
1,3400934768491,,0,A,,25,,,N,G/100 G,...,2,461,C05A2,C05AX03,C05,TITANOREINE CREME 1/40 G,TOPIQUES ANTIHEMORROIDAIRES SANS CORTICOIDES,3476849,VASCULOPROTECTEURS,AUTRES PREPARATIONS EN ASSOCIATION
2,3400932300778,,0,A,,03,,**,N,G,...,2,24,C05A2,C05AX03,C05,TITANOREINE SUPPO 12,TOPIQUES ANTIHEMORROIDAIRES SANS CORTICOIDES,3230077,VASCULOPROTECTEURS,AUTRES PREPARATIONS EN ASSOCIATION
3,3400931407249,,0,A,,58,,**,N,G/100 G,...,2,344,C05A2,C05AX03,C05,PROCTOLOG CREME RECTALE 1/20 G,TOPIQUES ANTIHEMORROIDAIRES SANS CORTICOIDES,3140724,VASCULOPROTECTEURS,AUTRES PREPARATIONS EN ASSOCIATION
6,3400930895900,,0,A,,08,,**,N,G/100 G,...,2,183,C05B,C05B,C05,CREME RAP CREME TUBE 105G 1/105 G,"THERAPEUTIQUE ANTIVARICOSIQUE, A USAGE TOPIQUE",3089590,VASCULOPROTECTEURS,THERAPEUTIQUE ANTIVARIQUEUSE


## Define Pharma Classes for the Fall Study

In [5]:
from collections import namedtuple

Pharma_mapping = namedtuple('Pharam_Mapping', ['Pharma_class', 'ATC_Codes', 'ATC_Exceptions', "Therapeutic_class"])

In [6]:
fall_study_mappings = [
    Pharma_mapping("Antidepresseurs_Tricycliques", ["N06AA"], ["N06AA06"], "Antidepresseurs"),
    Pharma_mapping("Antidepresseurs_ISRS", ["N06AB"], None, "Antidepresseurs"),
    Pharma_mapping("Antidepresseurs_ISRSN", ["N06AX11", "N06AX16", "N06AX17", "N06AX21", "N06AX26"], None, "Antidepresseurs"),
    Pharma_mapping("Antidepresseurs_IMAO_AB", ["N06AF"], None, "Antidepresseurs"),
    Pharma_mapping("Antidepresseurs_IMAO_A", ["N06AG"], None, "Antidepresseurs"),
    Pharma_mapping("Antidepresseurs_Autres", ["N06AX03", "N06AX09", "N06AX14", "N06AX22", "N06AA06"], None, "Antidepresseurs"),
    Pharma_mapping("Antihypertenseurs_SARTANS", ["C09C", "C09D"], None, "Antihypertenseurs"),
    Pharma_mapping("Antihypertenseurs_IEC", ["C09A", "C09B"], None, "Antihypertenseurs"),
    Pharma_mapping("Antihypertenseurs_Diuretiques", ["C03"], None, "Antihypertenseurs"),
    Pharma_mapping("Antihypertenseurs_Betabloquants", ["C07"], None, "Antihypertenseurs"),
    Pharma_mapping("Antihypertenseurs_Inhibiteurs_calciques", ["C08"], None, "Antihypertenseurs"),
    Pharma_mapping("Antihypertenseurs_Autres", ["C02", "C09XA", "C10BX03"], None, "Antihypertenseurs"),
    Pharma_mapping("Hypnotiques_Benzodiazepines_anxiolytiques", ["N05BA"], None, "Hypnotiques"),
    Pharma_mapping("Hypnotiques_Autres_anxiolytiques", ["N05BB", "N05BC", "N05BE", "N05BX"], ["N05BC51"], "Hypnotiques"),
    Pharma_mapping("Hypnotiques_Benzodiazepines_hypnotiques", ["N05CD"], ["N05CD08"], "Hypnotiques"),
    Pharma_mapping("Hypnotiques_Autres_hypnotiques", ["N05CF", "N05BC51", "N05CM11", "N05CM16", "N05CX"], None, "Hypnotiques"),
    Pharma_mapping("Neuroleptiques_Neuroleptiques_atypiques", ["N05A"], ["N05AL06", "N05AN01", "N05AA", "N05AH02", "N05AH03", "N05AL05", "N05AX08", "N05AX12", "N05AA07"], "Neuroleptiques"),
    Pharma_mapping("Neuroleptiques_Autres_neuroleptiques", ["N05AA", "N05AH02", "N05AH03", "N05AL05", "N05AX08", "N05AX12"], None, "Neuroleptiques")
]

In [7]:
input_table = ir_pha_r[["PHA_ATC_C07", "PHA_CIP_C13", "PHA_NOM_PA"]].copy()

### Define pharma and thera finding classes

In [8]:
def find_pharma_class(ATC_code: str, mappings) -> str:
    for pharma_mapping in mappings:
        found = any([ATC_code.startswith(atc_code) for atc_code in pharma_mapping.ATC_Codes])

        if found:
            exception = (pharma_mapping.ATC_Exceptions is not None) and any([ATC_code.startswith(atc_code) for atc_code in pharma_mapping.ATC_Exceptions])
            if not exception:
                return pharma_mapping.Pharma_class

    return None

In [9]:
def find_thera_class(ATC_code: str, mappings) -> str:
    for pharma_mapping in mappings:
        found = any([ATC_code.startswith(atc_code) for atc_code in pharma_mapping.ATC_Codes])

        if found:
            exception = (pharma_mapping.ATC_Exceptions is not None) and any([ATC_code.startswith(atc_code) for atc_code in pharma_mapping.ATC_Exceptions])
            if not exception:
                return pharma_mapping.Therapeutic_class

    return None

## Apply the functions

In [10]:
input_table["pharmaceutic_family"] = input_table.PHA_ATC_C07.apply(lambda x: find_pharma_class(x, fall_study_mappings))
input_table["therapeutic"] = input_table.PHA_ATC_C07.apply(lambda x: find_thera_class(x, fall_study_mappings))

output_table = input_table[input_table["pharmaceutic_family"].notnull()].copy()

In [11]:
# Split the Molecules
molecules_series = output_table.set_index("PHA_CIP_C13").PHA_NOM_PA.apply(lambda x: x.split(" + "))

# Flat Map
molecules_series = molecules_series.apply(pd.Series).unstack().dropna().reset_index().drop("level_0", axis="columns").rename(columns={0: "molecule"})

molecules_series.head()

Unnamed: 0,PHA_CIP_C13,molecule
0,3400937395793,ATENOLOL
1,3400938073065,NEBIVOLOL
2,3400937316903,ATENOLOL
3,3400937420389,NEBIVOLOL
4,3400937299190,ATENOLOL


In [12]:
final_df = pd.merge(output_table, molecules_series, on="PHA_CIP_C13", how="inner")

final_df.head()

Unnamed: 0,PHA_ATC_C07,PHA_CIP_C13,PHA_NOM_PA,pharmaceutic_family,therapeutic,molecule
0,C07AB03,3400937395793,ATENOLOL,Antihypertenseurs_Betabloquants,Antihypertenseurs,ATENOLOL
1,C07AB12,3400938073065,NEBIVOLOL,Antihypertenseurs_Betabloquants,Antihypertenseurs,NEBIVOLOL
2,C07AB03,3400937316903,ATENOLOL,Antihypertenseurs_Betabloquants,Antihypertenseurs,ATENOLOL
3,C07AB12,3400937420389,NEBIVOLOL,Antihypertenseurs_Betabloquants,Antihypertenseurs,NEBIVOLOL
4,C07AB03,3400937299190,ATENOLOL,Antihypertenseurs_Betabloquants,Antihypertenseurs,ATENOLOL


In [13]:
len(final_df)

7039

## Save the output

In [26]:
output_file_path = "/path/to/mapping_molecules.csv"

final_df.to_csv(output_file_path, index=False, )