# Setup

In [6]:
import pandas as pd
import numpy as np
import src.maude_interface as maude
import logging

logging.basicConfig(level=logging.DEBUG)
data_folder = "../data/"
reference_folder = "../reference/"

# Code used for Initial testing/development
## Compile Database to analyze (*deprecated* in favor of maude.compile_maude_database)

In [7]:
# Tests to prove merging data and making large database.
mdr_base = maude.maude_to_pandas(data_folder + "mdrfoi.txt")
#foidev_test = maude.maude_to_pandas("./data/foidev.txt")
mdr_total = maude.add_data_to_mdr(mdr_base, (data_folder + "foidev1998.txt", data_folder + "foidevproblem.txt"))
mdr_total = pd.merge(mdr_total, maude.maude_to_pandas(reference_folder + "deviceproblemcodes.txt"), how='left', on='DEVICE_PROBLEM_CODE')

DEBUG:root:Reading file ../data/mdrfoi.txt with dtype=str
DEBUG:root:Reading file ../data/foidev1998.txt with dtype=str
DEBUG:root:Reading file ../data/foidevproblem.txt with dtype=str
DEBUG:root:Reading file ../reference/deviceproblemcodes.txt with dtype=str


## Save Database for later

In [None]:
mdr_total.to_pickle(data_folder + "mdrfoi_manual_compiled.pkl")
print(mdr_total.shape)
print(mdr_total.columns)
#print(mdr_total)

## Check if a CSV File is properly formatted

In [None]:
maude.check_bad_csv(data_folder + "excluded/foidev.txt")

In [None]:
foidev_dtype = {'MDR_REPORT_KEY': np.float32, 'DEVICE_PROBLEM_CODE': np.float32}
foidev_df = maude.maude_to_pandas(data_folder + "foidevproblem.txt", np.float32)
#print(foidev_df.dtypes)
print(foidev_df.info())
#foidev_df.apply(pd.to_numeric, errors='coerce', downcast='float')
#print(foidev_df.info())
print(foidev_df.isnull().sum())

In [None]:
del mdr_base
del mdr_total
del foidev_df

# PRODUCTION CODE
## New Way to Generate MAUDE Database

In [8]:
# alternative to all above statements except pickle
all_data = maude.compile_maude_database(data_folder, reference_folder, "mdrfoi.txt")

DEBUG:root:Constructing database from base file: ../data/mdrfoi.txt
DEBUG:root:Reading file ../data/mdrfoi.txt with dtype=str
DEBUG:root:Attempting to add file foidev1998.txt to dataset
DEBUG:root:Reading file ../data/foidev1998.txt with dtype=str
DEBUG:root:Reading file ../data/foidev1998.txt with dtype=str
DEBUG:root:Attempting to add file foidev2012.txt to dataset
DEBUG:root:Reading file ../data/foidev2012.txt with dtype=str
DEBUG:root:Reading file ../data/foidev2012.txt with dtype=str
DEBUG:root:Attempting to add file foidevproblem.txt to dataset
DEBUG:root:Reading file ../data/foidevproblem.txt with dtype=str
DEBUG:root:Reading file ../data/foidevproblem.txt with dtype=str
DEBUG:root:Attempting to add file foitext1996.txt to dataset
DEBUG:root:Reading file ../data/foitext1996.txt with dtype=str
DEBUG:root:Reading file ../data/foitext1996.txt with dtype=str
DEBUG:root:Attempting to add file foitext1997.txt to dataset
DEBUG:root:Reading file ../data/foitext1997.txt with dtype=str
DE

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148516 entries, 0 to 148515
Data columns (total 77 columns):
MDR_REPORT_KEY                    148516 non-null object
EVENT_KEY                         0 non-null object
REPORT_NUMBER                     148516 non-null object
REPORT_SOURCE_CODE                148516 non-null object
MANUFACTURER_LINK_FLAG_           148516 non-null object
NUMBER_DEVICES_IN_EVENT           0 non-null object
NUMBER_PATIENTS_IN_EVENT          0 non-null object
DATE_RECEIVED                     148516 non-null object
ADVERSE_EVENT_FLAG                148052 non-null object
PRODUCT_PROBLEM_FLAG              148237 non-null object
DATE_REPORT                       148019 non-null object
DATE_OF_EVENT                     123835 non-null object
REPROCESSED_AND_REUSED_FLAG       148390 non-null object
REPORTER_OCCUPATION_CODE          142238 non-null object
HEALTH_PROFESSIONAL               142238 non-null object
INITIAL_REPORT_TO_FDA             142238 non-null

In [9]:
# Tries casting a column to categories. Used for classification, like hardware/software issue
problem_codes = maude.maude_to_pandas(reference_folder + "deviceproblemcodes.txt")
problem_codes['ERR_TYPE'] = problem_codes['ERR_TYPE'].astype('category')
print(problem_codes.describe())
print("\n\nProblem Code categories:\n" + str(problem_codes['ERR_TYPE'].value_counts()))

DEBUG:root:Reading file ../reference/deviceproblemcodes.txt with dtype=str


       DEVICE_PROBLEM_CODE DEVICE_PROBLEM_DESCRIPTION  ERR_TYPE
count                  988                        988       987
unique                 988                        988         4
top                   1556         Needle, separation  Hardware
freq                     1                          1       619


Problem Code categories:
Hardware    619
Unknown     184
Both        103
Software     81
Name: ERR_TYPE, dtype: int64
