# Projects for Analysis of Anti-Depressant Drugs’s Adverse Events by FDA Adverse Event Reporting System (FAERS) from January 2019 to December 2023

## 1. MERGE DATA

In [23]:
# load library
import pandas as pd
import numpy as np

Refer to Code_Research_FDA_MergedOnly.ipynb

## 2. CLEANING DATA

In [31]:
# load merged data
fda = pd.read_csv('./FAERS_19Q1_23Q4.csv', index_col=0)

fda.head()
            

Unnamed: 0,primaryid,caseid,drug_seq,role_cod,drugname,prod_ai,val_vbm,route,dose_vbm,cum_dose_chr,...,to_mfr,occp_cod,reporter_country,occr_country,pt,drug_rec_act,outc_cod,rpsr_cod,year,quarter
0,103516324,10351632,10,C,QUETIAPINE.,QUETIAPINE,1,,"25 MG, 1X/DAY (AT BED TIME)",,...,,MD,US,US,Bronchitis,,,,2019,1
1,103516324,10351632,10,C,QUETIAPINE.,QUETIAPINE,1,,"25 MG, 1X/DAY (AT BED TIME)",,...,,MD,US,US,Confusional state,,,,2019,1
2,103516324,10351632,10,C,QUETIAPINE.,QUETIAPINE,1,,"25 MG, 1X/DAY (AT BED TIME)",,...,,MD,US,US,Diarrhoea,,,,2019,1
3,103516324,10351632,10,C,QUETIAPINE.,QUETIAPINE,1,,"25 MG, 1X/DAY (AT BED TIME)",,...,,MD,US,US,Disturbance in attention,,,,2019,1
4,103516324,10351632,10,C,QUETIAPINE.,QUETIAPINE,1,,"25 MG, 1X/DAY (AT BED TIME)",,...,,MD,US,US,Feeling abnormal,,,,2019,1


In [10]:
# check variables
fda.info()

<class 'pandas.core.frame.DataFrame'>
Index: 257883 entries, 0 to 257882
Data columns (total 56 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   primaryid         257883 non-null  int64  
 1   caseid            257883 non-null  int64  
 2   drug_seq          257883 non-null  int64  
 3   role_cod          257883 non-null  object 
 4   drugname          257883 non-null  object 
 5   prod_ai           257189 non-null  object 
 6   val_vbm           257883 non-null  int64  
 7   route             195200 non-null  object 
 8   dose_vbm          184718 non-null  object 
 9   cum_dose_chr      5238 non-null    float64
 10  cum_dose_unit     4912 non-null    object 
 11  dechal            180186 non-null  object 
 12  rechal            42655 non-null   object 
 13  lot_num           28560 non-null   object 
 14  exp_dt            958 non-null     float64
 15  nda_num           82059 non-null   float64
 16  dose_amt          149501 

In [12]:
# check variables
fda.columns

Index(['primaryid', 'caseid', 'drug_seq', 'role_cod', 'drugname', 'prod_ai',
       'val_vbm', 'route', 'dose_vbm', 'cum_dose_chr', 'cum_dose_unit',
       'dechal', 'rechal', 'lot_num', 'exp_dt', 'nda_num', 'dose_amt',
       'dose_unit', 'dose_form', 'dose_freq', 'indi_drug_seq', 'indi_pt',
       'dsg_drug_seq', 'start_dt', 'end_dt', 'dur', 'dur_cod', 'caseversion',
       'i_f_code', 'event_dt', 'mfr_dt', 'init_fda_dt', 'fda_dt', 'rept_cod',
       'auth_num', 'mfr_num', 'mfr_sndr', 'lit_ref', 'age', 'age_cod',
       'age_grp', 'sex', 'e_sub', 'wt', 'wt_cod', 'rept_dt', 'to_mfr',
       'occp_cod', 'reporter_country', 'occr_country', 'pt', 'drug_rec_act',
       'outc_cod', 'rpsr_cod', 'year', 'quarter'],
      dtype='object')

In [18]:
# check indication
fda['indi_pt'].value_counts()

indi_pt
Major depression                                             128891
Antidepressant therapy                                        71373
Mixed anxiety and depressive disorder                         19345
Schizoaffective disorder depressive type                      18527
Persistent depressive disorder                                 8545
Adjustment disorder with depressed mood                        4720
Perinatal depression                                           3430
Respiratory depression                                          785
Post stroke depression                                          445
Antidepressant drug level                                       434
Adjustment disorder with mixed anxiety and depressed mood       405
Electrocardiogram ST segment depression                         222
Agitated depression                                             220
Antidepressant drug clearance                                   206
Menopausal depression                   

In [32]:
# filter non-relevant indication
focused_indi = ['Major depression', 'Antidepressant therapy', 'Mixed anxiety and depressive disorder',
                'Persistent depressive disorder']

filtered_fda = fda[fda['indi_pt'].isin(focused_indi)]

filtered_fda['indi_pt'].value_counts()


indi_pt
Major depression                         128891
Antidepressant therapy                    71373
Mixed anxiety and depressive disorder     19345
Persistent depressive disorder             8545
Name: count, dtype: int64

In [20]:
filtered_fda.info()

<class 'pandas.core.frame.DataFrame'>
Index: 228154 entries, 0 to 257882
Data columns (total 56 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   primaryid         228154 non-null  int64  
 1   caseid            228154 non-null  int64  
 2   drug_seq          228154 non-null  int64  
 3   role_cod          228154 non-null  object 
 4   drugname          228154 non-null  object 
 5   prod_ai           227502 non-null  object 
 6   val_vbm           228154 non-null  int64  
 7   route             171076 non-null  object 
 8   dose_vbm          164375 non-null  object 
 9   cum_dose_chr      4629 non-null    float64
 10  cum_dose_unit     4303 non-null    object 
 11  dechal            155296 non-null  object 
 12  rechal            38567 non-null   object 
 13  lot_num           26932 non-null   object 
 14  exp_dt            894 non-null     float64
 15  nda_num           74309 non-null   float64
 16  dose_amt          135828 

In [22]:
filtered_fda.head().T

Unnamed: 0,0,1,2,3,4
primaryid,103516324,103516324,103516324,103516324,103516324
caseid,10351632,10351632,10351632,10351632,10351632
drug_seq,10,10,10,10,10
role_cod,C,C,C,C,C
drugname,QUETIAPINE.,QUETIAPINE.,QUETIAPINE.,QUETIAPINE.,QUETIAPINE.
prod_ai,QUETIAPINE,QUETIAPINE,QUETIAPINE,QUETIAPINE,QUETIAPINE
val_vbm,1,1,1,1,1
route,,,,,
dose_vbm,"25 MG, 1X/DAY (AT BED TIME)","25 MG, 1X/DAY (AT BED TIME)","25 MG, 1X/DAY (AT BED TIME)","25 MG, 1X/DAY (AT BED TIME)","25 MG, 1X/DAY (AT BED TIME)"
cum_dose_chr,,,,,


In [33]:
# check role_cod
rename_role_cod = {'PS': 'primary',
                   'SS': 'secondary',
                   'C': 'concomitant',
                   'I': 'interacting'}

filtered_fda['role_cod'] = filtered_fda['role_cod'].replace(rename_role_cod)

filtered_fda['role_cod'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_fda['role_cod'] = filtered_fda['role_cod'].replace(rename_role_cod)


role_cod
secondary      92911
concomitant    64128
primary        52516
interacting    18599
Name: count, dtype: int64

In [25]:
# check drugname
filtered_fda['drugname'].value_counts()

drugname
SERTRALINE                                                                13469
ABILIFY                                                                   12045
VENLAFAXINE                                                               10091
ESCITALOPRAM                                                               6924
TRAZODONE                                                                  6706
                                                                          ...  
PAROXETINA RATIOPHARM                                                         1
NEFAZODONE HYDROCHLORIDE.                                                     1
Nefazodone                                                                    1
SERTRALINA CINFA  50 mg COMPRIMIDOS RECUBIERTOS CON PELICULA EFG , ...        1
CHLORPROMAZINE HYDROCHLORIDE.                                                 1
Name: count, Length: 1426, dtype: int64

In [26]:
# # check prod_ai
filtered_fda['prod_ai'].value_counts()

prod_ai
SERTRALINE HYDROCHLORIDE         18058
VENLAFAXINE HYDROCHLORIDE        17010
ARIPIPRAZOLE                     16264
ESCITALOPRAM OXALATE             14172
MIRTAZAPINE                      11488
                                 ...  
ATORVASTATIN CALCIUM                 1
DIPHENHYDRAMINE HYDROCHLORIDE        1
BLONANSERIN                          1
ACETAMINOPHEN\HYDROCODONE            1
GLIMEPIRIDE                          1
Name: count, Length: 396, dtype: int64

In [35]:
# check val_vbm
rename_val_vbm = {1: 'tradename', 2: 'verbatim'}

filtered_fda['val_vbm'] = filtered_fda['val_vbm'].replace(rename_val_vbm)

filtered_fda['val_vbm'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_fda['val_vbm'] = filtered_fda['val_vbm'].replace(rename_val_vbm)


val_vbm
tradename    227588
verbatim        566
Name: count, dtype: int64

In [37]:
# check route!!
filtered_fda['route'].value_counts()

route
Unknown                                  90527
Oral                                     76854
Transplacental                            1937
Intramuscular                              406
Other                                      405
Nasal                                      374
Transdermal                                126
Intravenous (not otherwise specified)      113
Sublingual                                  77
Subcutaneous                                57
Intravenous drip                            53
Intracardiac                                33
Transmammary                                30
Buccal                                      24
Vaginal                                     24
Respiratory (inhalation)                    11
Endocervical                                 8
Rectal                                       5
INTRACAVITY                                  5
Retrobulbar                                  4
Intra-uterine                                1
INTRANA

In [38]:
# check dose_vbm!!
filtered_fda['dose_vbm'].value_counts()

dose_vbm
UNK                                            30387
1 DF, QD                                        2510
5 MG, QD                                        1814
10 MG, QD                                       1438
2 MG, QD                                        1254
                                               ...  
20 milligram, AM (20.0 MG before breakfast)        1
PRIOR TO 12/23/2008??AM                            1
2.5mg,qd                                           1
300 MG, (100 MG AM, 200 MG QHS)                    1
300 MILLIGRAM, QAM                                 1
Name: count, Length: 5685, dtype: int64

In [39]:
# check dose_vbm!!
filtered_fda['cum_dose_chr'].value_counts()

cum_dose_chr
0.000        326
160.000      166
1.000        125
12760.000    106
40.000       105
            ... 
4125.000       1
1116.000       1
153.000        1
50.725         1
1210.000       1
Name: count, Length: 356, dtype: int64


       'val_vbm' : drugsource
       
       'route', 'dose_vbm', 'cum_dose_chr', 'cum_dose_unit',
       'dechal', 'rechal', 'lot_num', 'exp_dt', 'nda_num', 'dose_amt',
       'dose_unit', 'dose_form', 'dose_freq', 'indi_drug_seq', 'indi_pt',
       'dsg_drug_seq', 'start_dt', 'end_dt', 'dur', 'dur_cod', 'caseversion',
       'i_f_code', 'event_dt', 'mfr_dt', 'init_fda_dt', 'fda_dt', 'rept_cod',
       'auth_num', 'mfr_num', 'mfr_sndr', 'lit_ref', 'age', 'age_cod',
       'age_grp', 'sex', 'e_sub', 'wt', 'wt_cod', 'rept_dt', 'to_mfr',
       'occp_cod', 'reporter_country', 'occr_country', 'pt', 'drug_rec_act',
       'outc_cod', 'rpsr_cod', 'year', 'quarter'],
      dtype='object')