In [1]:
import numpy as np
import pandas as pd

### Read CSV

In [2]:
df_path = 'aiml_dfs/aiml_df.csv'
df = pd.read_csv(df_path)

### Calculate columns

In [35]:
# Body area
body_areas = ['Head', 'Heart', 'Breast', 'Chest']
df['body_area_clean'] = [c if c in body_areas else 'Multiple/Other' for c in df['body_area']]
df['body_area_clean'].value_counts()

Multiple/Other    60
Head              25
Heart             17
Breast            15
Chest             13
Name: body_area_clean, dtype: int64

In [36]:
# Risk Area
df['risk_level_high'] = (df['risk_level'].isin([3,4])).astype(int)
df['risk_level_high'].value_counts()

0    76
1    54
Name: risk_level_high, dtype: int64

In [37]:
# Multisite
df['multisite'] = 1-((df['num_sites']=='1')|(df['num_sites'].isnull())).astype(int)
df['multisite'].value_counts()

0    93
1    37
Name: multisite, dtype: int64

In [38]:
# Number of high risk devices by body area
df.groupby(['body_area_clean']).sum()['risk_level_high']

body_area_clean
Breast            12
Chest              6
Head              23
Heart              3
Multiple/Other    10
Name: risk_level_high, dtype: int64

In [39]:
# Number of multisite per body area and risk level
df.groupby(['body_area_clean', 'risk_level_high']).sum()['multisite']

body_area_clean  risk_level_high
Breast           0                   0
                 1                   5
Chest            0                   2
                 1                   3
Head             0                   0
                 1                  12
Heart            0                   2
                 1                   0
Multiple/Other   0                  11
                 1                   2
Name: multisite, dtype: int64

In [40]:
# Prospective studies by body area, risk level, and multisite
df.groupby(['body_area_clean', 'risk_level_high', 'multisite']).sum()['is_prospective']

body_area_clean  risk_level_high  multisite
Breast           0                0            0
                 1                0            0
                                  1            0
Chest            0                0            0
                                  1            0
                 1                0            0
                                  1            0
Head             0                0            0
                 1                0            0
                                  1            0
Heart            0                0            0
                                  1            0
                 1                0            0
Multiple/Other   0                0            2
                                  1            2
                 1                0            0
                                  1            0
Name: is_prospective, dtype: int64

In [41]:
# Sample Size
print('Sample Size Mean: {}'.format(df['sample_size'].mean()))
print('Sample Size Med ian: {}'.format(df['sample_size'].median()))
print('Sample Size Standard Deviation: {}'.format(df['sample_size'].std()))

Sample Size Mean: 473.9718309859155
Sample Size Med ian: 300.0
Sample Size Standard Deviation: 618.6538374759689
