In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# tblBaseline.csv

In [2]:
columns = [
    'PtID', 'seastxt0', 'trtgroup', 'age0', 'agegrp0', 'raceeth', 'aff_eye','cdmsbase',
    'logaff0', 'discafftxt0', 'pain0', 'fhms0', 'prneuro0', 'lesion0', 'lesnum0', 
    ]

column_names = [
    'Patient ID', 'Season', 'Treatment Group', 'Age', 'Age Group', 'Race', 'Affected Eye', 'Had MS', 'Affected Eye VA LogMAR', 'Optic Disc', 'Eye Pain', 'FH', 
    'Prior Neurologic Symptoms', 'MRI Lesions', 'Lesions Count'
]

In [3]:
df_base = pd.read_csv('tblBaseline.csv')

In [4]:
new_df_base = df_base.loc[:, columns]
new_df_base.columns = column_names

In [5]:
def get_season(x):
    if x == '1-Summer':
        return 1
    elif x == '2-Fall':
        return 2
    elif x == '3-Winter':
        return 3
    else:
        return 4
    
new_df_base['Season'] = new_df_base['Season'].apply(get_season)

In [6]:
new_df_base['Age Group'] = new_df_base['Age Group'].apply(lambda x: 'A' if x == 'A)<=30' else 'B')

In [7]:
new_df_base['MRI Lesions'] = new_df_base['MRI Lesions'].apply(lambda x: 1 if x == 'B)YES' else 0)

In [8]:
new_df_base['Prior Neurologic Symptoms'] = new_df_base['Prior Neurologic Symptoms'].apply(lambda x: 1 if x == 'YES' else 0)

In [9]:
new_df_base['Eye Pain'] = new_df_base['Eye Pain'].apply(lambda x: 1 if x == 'YES' else 0)

In [10]:
new_df_base['FH'] = new_df_base['FH'].apply(lambda x: 1 if x == 'YES' else 0)

In [11]:
new_df_base['Had MS'] = new_df_base['Had MS'].apply(lambda x: 1 if x == 'True' else 0)

In [12]:
optic_disc = {
    "1-NORMAL": 1, "2-MILD/FOCAL": 2, "3-MILD/DIFFUSE": 3, "4-SEVERE/FOCAL": 4, "5-SEVERE/DIFFUSE": 5
}

In [13]:
new_df_base['Optic Disc'] = new_df_base['Optic Disc'].apply(lambda x: optic_disc[x])

In [14]:
new_df_base

Unnamed: 0,Patient ID,Season,Treatment Group,Age,Age Group,Race,Affected Eye,Had MS,Affected Eye VA LogMAR,Optic Disc,Eye Pain,FH,Prior Neurologic Symptoms,MRI Lesions,Lesions Count
0,215,1,INTRAVENOUS,43.405886,B,WHITE,OS,0,1.70,1,1,0,0,1,1
1,313,1,PLACEBO,22.140999,A,WHITE,OS,0,1.70,1,0,0,0,1,2
2,174,3,PREDNISONE,33.199179,B,WHITE,OS,0,0.70,1,1,0,0,1,1
3,420,3,PLACEBO,34.458590,B,WHITE,OD,0,0.08,1,1,0,0,1,>10
4,301,4,INTRAVENOUS,43.882272,B,WHITE,OS,0,-0.06,3,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
452,60,2,PREDNISONE,41.533196,B,WHITE,OD,0,1.70,1,1,0,0,0,0
453,290,3,PLACEBO,28.908966,A,WHITE,OD,0,1.70,1,1,0,0,1,6
454,238,3,PLACEBO,40.355921,B,WHITE,OD,0,0.30,1,1,1,0,1,4
455,176,4,INTRAVENOUS,33.831622,B,WHITE,OS,0,0.00,1,1,0,0,0,0


# tblF03Rand.csv - ONTT Eligibility Examination Form

In [63]:
df_rand = pd.read_csv('tblF03Rand.csv')

In [64]:
rand_columns = [
    'PtID', 'OCAEVSYM', 'OCAVEX', 'OCAPAE', 'OCACHP', 'OCAWP', 'OCADHCO', 'OCASHCO', 'OCADMS', 'OCASIN', 'OCAHYP',
    'OCAVRS', 'OCAHDB', 'OCAHPEP', 'OCBPSY', 'OCBTOB', 'OCBPDEF', 'OCBPDW', 'OCBDIOP', 'OCBDIOP'
    ]

rand_column_names = [
    'Patient ID', 'Earliest visual symptom', 'Vision at time of examination', 'Pain', 'Pain Score', 'When Pain', "H of OD", 
    'Hx of OS', 'Hx of MS', 'Hx of Sinusitis', 'Hx of HTN', 'Hx of Viral', 'Hx of Diabetes', 'PUD', 'Psychological', 'Smoking',
    'RAPD', 'RAPD Site', 'IOP OD', 'IOP OS'
]

In [65]:
new_df_rand = df_rand.loc[:, rand_columns]
new_df_rand.columns = rand_column_names

In [66]:
new_df_rand['RAPD'].value_counts()

1    454
2      3
Name: RAPD, dtype: int64

In [68]:
cross_tab = pd.crosstab(new_df_rand['Hx of HTN'], new_df_rand['Pain'])
cross_tab

Pain,1,2,3
Hx of HTN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,19,0,0
2,400,36,2


In [69]:
new_df_rand

Unnamed: 0,Patient ID,Earliest visual symptom,Vision at time of examination,Pain,Pain Score,When Pain,H of OD,Hx of OS,Hx of MS,Hx of Sinusitis,Hx of HTN,Hx of Viral,Hx of Diabetes,PUD,Psychological,Smoking,RAPD,RAPD Site,IOP OD,IOP OS
0,272,2,2,1,1.0,2.0,0,0,2,2,2,2,1,1,2,2,1,2.0,17,17
1,85,3,3,1,2.0,3.0,0,0,2,2,2,1,1,1,1,1,1,2.0,15,15
2,71,2,3,1,3.0,3.0,0,0,2,2,2,2,1,1,2,1,1,2.0,12,12
3,91,2,2,1,2.0,3.0,0,0,2,2,2,2,1,1,2,1,1,2.0,15,15
4,215,2,4,1,1.0,1.0,0,0,2,1,2,2,1,1,2,1,1,2.0,14,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
452,101,2,2,1,1.0,1.0,0,0,2,2,2,2,1,1,2,2,1,2.0,12,12
453,54,3,3,1,2.0,3.0,0,0,2,2,2,2,2,1,2,2,1,2.0,12,12
454,32,2,4,1,2.0,3.0,0,0,2,2,2,2,1,1,1,1,1,1.0,18,18
455,259,2,3,1,2.0,3.0,0,0,2,1,2,2,1,1,2,1,1,1.0,10,10


# tblNeuro - Basic Neurologic Data for All Exams

In [21]:
df_neuro = pd.read_csv('tblNeuro.csv')

In [22]:
neuro_columns = [
    'PtID','VISNUM', 'FHXON', 'FHXMS', 'MSDXEDIT', 'NEURDX'
    ]

neuro_column_names = [
    'Patient ID', 'Visit Number', 'Family History of ON since Last Visit', 'Family History of MS since Last Visit',
    'Edited MS DX', 'Reported MS DX'
]

In [23]:
new_df_neuro = df_neuro.loc[:, neuro_columns]
new_df_neuro.columns = neuro_column_names

In [24]:
new_df_neuro.fillna(0, inplace=True)

In [25]:
new_df_neuro['Family History of MS since Last Visit'].value_counts()

2.0    2107
0.0    1709
1.0      96
Name: Family History of MS since Last Visit, dtype: int64

In [26]:
new_df_neuro

Unnamed: 0,Patient ID,Visit Number,Family History of ON since Last Visit,Family History of MS since Last Visit,Edited MS DX,Reported MS DX
0,215,0,2.0,2.0,1,1.0
1,215,7,0.0,0.0,1,1.0
2,215,8,0.0,0.0,1,1.0
3,215,9,0.0,0.0,1,1.0
4,215,10,0.0,0.0,1,1.0
...,...,...,...,...,...,...
3907,297,17,0.0,0.0,4,4.0
3908,290,17,0.0,0.0,4,4.0
3909,238,17,0.0,0.0,4,4.0
3910,176,17,0.0,0.0,1,1.0


# tblYr10MRIGrade - 10-Year MRI Grading Form

In [45]:
df_mri_10 = pd.read_csv('tblYr10MRIGrade.csv')

In [46]:
mri_10_columns = [
    'PtID','MRIGrd10', 'MRICls10','MRIQly10', 'MRIChg10', 'LesNew10', 'LesNum10', 'LesCrt10', 'MRIImp10', 'PerChg10'
    ]

mri_10_column_names = [
    'Patient ID', 'MRI grade', 'MRI class', 'Quality', 'Change?', 'New Demyelination?', 'Number of Lesions?', 'Certainty',
    'No New - Improve?', 'Other Pertitent Change'
]

In [47]:
new_df_mri_10 = df_mri_10.loc[:, mri_10_columns]
new_df_mri_10.columns = mri_10_column_names

In [48]:
new_df_mri_10

Unnamed: 0,Patient ID,MRI grade,MRI class,Quality,Change?,New Demyelination?,Number of Lesions?,Certainty,No New - Improve?,Other Pertitent Change
0,136,4,A,,NO,YES,8,LOW,NO,NO
1,348,2,B,,YES,YES,1,LOW,NO,NO
2,157,0,A,,NO,NO,,,NO,NO
3,20,4,A,,YES,YES,4,HIGH,NO,NO
4,122,2,B,,YES,YES,1,HIGH,NO,NO
...,...,...,...,...,...,...,...,...,...,...
117,456,4,A,GOOD,YES,YES,4,HIGH,,NO
118,331,0,A,GOOD,NO,,,,,
119,80,0,A,FAIR,NO,,,,,
120,176,0,A,,NO,,,,,


# tblYr10Neuro - 10-Year Neurologic Examination Form

In [70]:
df_10_neuro = pd.read_csv('tblYr10Neuro.csv')

In [71]:
neuro_10_columns = [
    'PtID','MsDx', 'DisCourse', 'NeurEvnt', 'NumEvntLV', 'OpticNrvL', 'BrainStemL', 'SpinalCrdL', 'OtherL', 'OtherLDs',
    'NeurDysfLV', 'NeurDysfM'
    ]

neuro_10_column_names = [
    'Patient ID', 'Clinical MS Dx', 'Classify Current Disease Course', 'Neuro Event', 'Event Count', 'Optic Nerve', 'Brain Stem',
    'Spinal Cord', 'Other', 'Other Description', 'PND since last visit', 'PND in 12 months'
]

In [72]:
new_df_10_neuro = df_10_neuro.loc[:, neuro_10_columns]
new_df_10_neuro.columns = neuro_10_column_names

In [73]:
new_df_10_neuro.fillna(0, inplace=True)
new_df_10_neuro

Unnamed: 0,Patient ID,Clinical MS Dx,Classify Current Disease Course,Neuro Event,Event Count,Optic Nerve,Brain Stem,Spinal Cord,Other,Other Description,PND since last visit,PND in 12 months
0,215,4.0,1.0,2,0.0,0.0,0.0,0.0,0.0,0,2.0,2.0
1,174,4.0,1.0,2,0.0,0.0,0.0,0.0,0.0,0,2.0,2.0
2,420,4.0,2.0,1,5.0,1.0,0.0,1.0,0.0,0,2.0,2.0
3,301,4.0,1.0,2,0.0,0.0,0.0,0.0,0.0,0,2.0,2.0
4,251,1.0,0.0,2,0.0,0.0,0.0,0.0,0.0,0,2.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...
314,60,4.0,4.0,1,1.0,1.0,0.0,0.0,0.0,0,1.0,2.0
315,290,4.0,3.0,2,0.0,0.0,0.0,0.0,0.0,0,1.0,1.0
316,238,4.0,1.0,1,1.0,0.0,0.0,0.0,1.0,Cortical,1.0,2.0
317,176,1.0,0.0,2,0.0,0.0,0.0,0.0,0.0,0,2.0,2.0


# tblYr15Neuro - 15-Year Neurologic Examination Form

In [74]:
df_15_neuro = pd.read_csv('tblYr15Neuro.csv')

In [75]:
neuro_15_columns = [
    'PtID','OpticNrvM', 'BrainStemM', 'SpinalCrdM', 'OtherM'
    ]

neuro_15_column_names = [
    'Patient ID', 'Optic Nerve', 'Brain Stem', 'Spinal Cord', 'Other'
]

In [76]:
new_df_15_neuro = df_15_neuro.loc[:, neuro_15_columns]
new_df_15_neuro.columns = neuro_15_column_names

In [77]:
new_df_15_neuro.fillna(0, inplace=True)

In [78]:
new_df_15_neuro

Unnamed: 0,Patient ID,Optic Nerve,Brain Stem,Spinal Cord,Other
0,215,0.0,0.0,0.0,0.0
1,174,0.0,0.0,0.0,0.0
2,420,0.0,1.0,0.0,0.0
3,301,0.0,0.0,0.0,0.0
4,372,0.0,0.0,1.0,0.0
...,...,...,...,...,...
285,297,1.0,0.0,0.0,0.0
286,290,0.0,0.0,0.0,1.0
287,238,0.0,0.0,0.0,0.0
288,176,0.0,0.0,0.0,0.0
