## Exploratory Analysis

In [13]:
import pandas as pd

In [14]:
data = pd.read_csv("diabeticVision.csv", index_col=0)

## Look at summary statistics

In [15]:
data.describe()

Unnamed: 0,id,age,trt,futime,status,risk,group
count,394.0,394.0,394.0,394.0,394.0,394.0,394.0
mean,873.203046,20.781726,0.5,35.579289,0.393401,9.69797,1.507614
std,495.52341,14.812074,0.500636,21.355896,0.489126,1.475033,1.11943
min,5.0,1.0,0.0,0.3,0.0,6.0,0.0
25%,480.0,10.0,0.0,13.9775,0.0,9.0,1.0
50%,834.0,16.0,0.5,38.8,0.0,10.0,1.5
75%,1296.0,30.0,1.0,54.2525,1.0,11.0,3.0
max,1749.0,58.0,1.0,74.97,1.0,12.0,3.0


In [26]:
data[data["trt"] == 1]["status"].value_counts(normalize=0)

0    143
1     54
Name: status, dtype: int64

In [25]:
data[data["trt"] == 0]["status"].value_counts(normalize=0)

1    101
0     96
Name: status, dtype: int64

In [20]:
data["type"].value_counts()

juvenile    228
adult       166
Name: type, dtype: int64

In [4]:
data.isna().sum()

id        0
laser     0
eye       0
age       0
type      0
trt       0
futime    0
status    0
risk      0
group     0
dtype: int64

In [6]:
data.groupby(["trt", "status"])["futime"].mean()

trt  status
0    0         46.321771
     1         18.948515
1    0         46.668112
     1         18.222407
Name: futime, dtype: float64

In [5]:
data.groupby(["trt", "status", "laser"])["futime"].mean()

trt  status  laser
0    0       argon    43.247234
             xenon    49.270816
     1       argon    16.125000
             xenon    21.716667
1    0       argon    45.927500
             xenon    47.339600
     1       argon    20.004828
             xenon    16.154800
Name: futime, dtype: float64

In [48]:
data.pivot(index=["id", "trt"], columns=["status"])["futime"][0].dropna()

id    trt
5     0      46.23
      1      46.23
14    1      42.50
16    0      42.27
      1      42.27
             ...  
1717  1      51.60
1727  1      49.97
1746  1      45.90
1749  0      41.93
      1      41.93
Name: 0, Length: 239, dtype: float64

## Look at various sub-groups

In [31]:
treat = data[data["trt"] == 1]
ctrl = data[data["trt"] == 0]
treat1 = treat[treat["laser"] == "argon"]
treat2 = treat[treat["laser"] == "xenon"]

In [None]:
treat[treat["status"] == 1]

In [15]:
data.groupby("id")["eye"].sum().unique()

array(['leftleft', 'rightright'], dtype=object)

In [17]:
data[data["futime"] == 46.23]

Unnamed: 0,id,laser,eye,age,type,trt,futime,status,risk
1,5,argon,left,28,adult,1,46.23,0,9
2,5,argon,left,28,adult,0,46.23,0,9
195,832,argon,right,5,juvenile,1,46.23,0,12
196,832,argon,right,5,juvenile,0,46.23,0,12


In [24]:
ctrl.describe()

Unnamed: 0,id,age,trt,futime,status,risk
count,101.0,101.0,101.0,101.0,101.0,101.0
mean,801.207921,23.079208,0.0,18.948515,1.0,9.970297
std,481.609869,15.532342,0.0,15.735833,0.0,1.465984
min,14.0,1.0,0.0,0.3,1.0,6.0
25%,409.0,11.0,0.0,6.53,1.0,9.0
50%,722.0,19.0,0.0,13.9,1.0,10.0
75%,1205.0,37.0,0.0,26.47,1.0,11.0
max,1746.0,56.0,0.0,61.83,1.0,12.0


In [25]:
treat1.describe()

Unnamed: 0,id,age,trt,futime,status,risk
count,29.0,29.0,29.0,29.0,29.0,29.0
mean,822.344828,18.206897,1.0,20.004828,1.0,9.931034
std,497.132368,14.639426,0.0,17.418952,0.0,1.2516
min,100.0,1.0,1.0,1.5,1.0,6.0
25%,357.0,9.0,1.0,5.77,1.0,9.0
50%,866.0,13.0,1.0,13.33,1.0,10.0
75%,1184.0,23.0,1.0,34.37,1.0,10.0
max,1649.0,53.0,1.0,63.33,1.0,12.0


In [26]:
treat2.describe()

Unnamed: 0,id,age,trt,futime,status,risk
count,25.0,25.0,25.0,25.0,25.0,25.0
mean,812.56,18.6,1.0,16.1548,1.0,9.72
std,438.838531,13.044795,0.0,10.425035,0.0,1.369915
min,127.0,3.0,1.0,1.77,1.0,6.0
25%,503.0,10.0,1.0,7.07,1.0,9.0
50%,778.0,13.0,1.0,13.83,1.0,10.0
75%,1017.0,25.0,1.0,25.63,1.0,11.0
max,1688.0,50.0,1.0,42.43,1.0,12.0


In [10]:
def assign_group(row):
    if row["trt"] == 0:
        if row["laser"] == "argon":
            return 0
        return 1
    elif row["trt"] == 1:
        if row["laser"] == "argon":
            return 2
        return 3    

## Identify and label Laser and Treatment as an interaction term

In [11]:
data["group"] = data[["laser", "trt"]].apply(assign_group, axis=1).astype("category")
data

Unnamed: 0,id,laser,eye,age,type,trt,futime,status,risk,group
1,5,argon,left,28,adult,1,46.23,0,9,2
2,5,argon,left,28,adult,0,46.23,0,9,0
3,14,argon,right,12,juvenile,1,42.50,0,8,2
4,14,argon,right,12,juvenile,0,31.30,1,6,0
5,16,xenon,right,9,juvenile,1,42.27,0,11,3
...,...,...,...,...,...,...,...,...,...,...
390,1727,argon,right,33,adult,0,2.90,1,10,0
391,1746,argon,right,3,juvenile,1,45.90,0,10,2
392,1746,argon,right,3,juvenile,0,1.43,1,10,0
393,1749,argon,right,32,adult,1,41.93,0,9,2


In [24]:
data["group"].describe()

count     394
unique      3
top         2
freq      197
Name: group, dtype: int64

In [12]:
data.to_csv("diabeticVision.csv")