# Load Raw PECARN TBI dataset
Either copy the PECARN TBI dataset to the same location as this notebook, or set a TBI_DIR environment variable to whereever the file is stored.

In [None]:
import os
import pandas as pd 

print('START: 00-load-raw-data.ipynb')

# the original dataset filename
pecarn_tbi_filename = 'TBI PUD 10-08-2013.csv'

# check to see if the TBI_DIR environment variable is set
try:
    pecarn_tbi_dir = os.environ['TBI_DIR']
    if os.path.exists(pecarn_tbi_dir + '/' + pecarn_tbi_filename):
        pecarn_tbi_filename = pecarn_tbi_dir + '/' + pecarn_tbi_filename
    else:
        pecarn_tbi_filename = os.getcwd() + '/' + pecarn_tbi_filename
except KeyError:
    pecarn_tbi_filename = os.getcwd() + '/' + pecarn_tbi_filename

# try and load the TBI dataset
try:
    pecarn_tbi = pd.read_csv(pecarn_tbi_filename, index_col=0)    
    print('  PECARN TBI data read from ' + pecarn_tbi_filename + ' into "pecarn_tbi" dataframe')
except FileNotFoundError:
    print('ERROR: ' + pecarn_tbi_filename + ' not found. Please either copy the TBI data to this location or set the TBI_DIR environment variable')    

# Column Ordering
Purely for convenience, reordering the columns by alphabetical name.

In [None]:
pecarn_tbi = pecarn_tbi.reindex(sorted(pecarn_tbi.columns), axis=1)

# Data Types
The majority of the columns in the PECARN dataset are essentially categorical, with the exception of GCS and Age columns which are numeric.

It is helpful to convert everything to a nullable integer type (Int64) as there is some missing data, and then to a Categorical type.

In [None]:
if 'pecarn_tbi' in locals():
    for col in list(pecarn_tbi):
        try:
            pecarn_tbi[col] = pecarn_tbi[col].astype(float).astype('Int64')
            if col not in ['AgeinYears', 'AgeInMonth', 'GCSEye', 'GCSVerbal', 'GCSMotor', 'GCSTotal']:
                pecarn_tbi[col] = pecarn_tbi[col].astype('category')
        except:
            pass