In [245]:
import pandas as pd
import numpy as np
import math

# Import data

In [246]:
import os
import glob

df=pd.DataFrame()
for filepath in glob.glob(os.path.join('raw_data', '*.txt')):
    tmp=pd.read_csv(filepath,header=None)
    tmp.columns=['Trial','ChartType','RealRatio','InputRatio','indexDif']
    #tmp= tmp.insert(loc=1, column='ID', value=filepath[9:][:-4])
    tmp['Name']=filepath[9:][:-4]
    if len(df)==0:
        df=tmp
    else:
        df=df.append(tmp).reset_index(drop=True)
df

Unnamed: 0,Trial,ChartType,RealRatio,InputRatio,indexDif,Name
0,1,Bubble,0.608244,0.90,0,MeijieWang
1,2,Bubble,0.548130,0.70,0,MeijieWang
2,3,Pie,0.600000,0.70,8,MeijieWang
3,4,Bubble,0.594684,0.60,0,MeijieWang
4,5,Line,0.567568,0.50,2,MeijieWang
5,6,Line,0.857143,0.80,5,MeijieWang
6,7,Bubble,0.513444,0.70,0,MeijieWang
7,8,Bubble,0.277138,0.20,0,MeijieWang
8,9,Bubble,0.212819,0.20,0,MeijieWang
9,10,Pie,0.666667,0.40,1,MeijieWang


# Compute error

In [247]:
#compute error
error=abs(df.RealRatio-df.InputRatio)*100+(1/8)
error[error<1]=1
df['log']=pd.DataFrame(np.log2(error))
df.head(5)

Unnamed: 0,Trial,ChartType,RealRatio,InputRatio,indexDif,Name,log
0,1,Bubble,0.608244,0.9,0,MeijieWang,4.872858
1,2,Bubble,0.54813,0.7,0,MeijieWang,3.93659
2,3,Pie,0.6,0.7,8,MeijieWang,3.33985
3,4,Bubble,0.594684,0.6,0,MeijieWang,0.0
4,5,Line,0.567568,0.5,2,MeijieWang,2.782777


In [248]:
df[df['log']<0]

Unnamed: 0,Trial,ChartType,RealRatio,InputRatio,indexDif,Name,log


# Divide data by chartType

In [249]:
#Divide data by chartType
Piedf=df[df.ChartType=="Pie"].reset_index(drop=True)
Bubbledf=df[df.ChartType=="Bubble"].reset_index(drop=True)
Linedf=df[df.ChartType=="Line"].reset_index(drop=True)
Piedf.head(5)

Unnamed: 0,Trial,ChartType,RealRatio,InputRatio,indexDif,Name,log
0,3,Pie,0.6,0.7,8,MeijieWang,3.33985
1,10,Pie,0.666667,0.4,1,MeijieWang,4.743712
2,12,Pie,0.916667,0.8,1,MeijieWang,3.559696
3,16,Pie,0.888889,0.8,5,MeijieWang,3.17215
4,19,Pie,0.454545,0.4,6,MeijieWang,2.480148


# Average error

In [292]:
print("Pie chart: ",Piedf.log.mean())
print("Line chart: ",Linedf.log.mean())
print("Bubble chart: ",Bubbledf.log.mean())

Pie chart:  2.6102638673558465
Line chart:  3.592643478287618
Bubble chart:  3.127869447027823


# Bootstrapped  CI

In [287]:
def bootstrapMeanList(df,N=1000,n=None):
    """ Bootstrap resample mean array
    Parameters
    ----------
    df : a dataframe wigh "log" column
    n : int, optional
      length of resampled array, equal to len(X) if n==None
    N: int, optional
      length of the mean array
    Results
    -------
    returns resamples log error mean array
    """
    length=len(df)
    if n==None:
        n=length
    meanlist=[]
    for i in range(N):
        indexArray=np.random.randint(0,length,n)
        resample=np.array(df.log)[indexArray]
        meanlist.append(resample.mean())
    return np.array(meanlist)
def CI(Array):
    n=len(Array)
    mean=Array.mean()
    std=Array.std()
    cih=mean + 1.96*std/math.sqrt(n)
    cil=mean - 1.96*std/math.sqrt(n)
    return cil,cih

In [288]:
PieCIL,PieCIH=CI(bootstrapMeanList(Piedf,N=10000))
LineCIL,LineCIH=CI(bootstrapMeanList(Linedf,N=10000))
BubbleCIL,BubbleCIH=CI(bootstrapMeanList(Bubbledf,N=10000))

In [289]:
print(PieCIL,PieCIH)

2.607828372913248 2.611430341774905


In [290]:
print(LineCIL,LineCIH)

3.5904279344587176 3.5940475822854703


In [291]:
print(BubbleCIL,BubbleCIH)

3.127252072515344 3.1306457541581394


In [282]:
Ye=df[df['Name']=="HaoYin"]
YePie=Ye[Ye.ChartType=="Pie"]
YeLine=Ye[Ye.ChartType=="Line"]
YeBubble=Ye[Ye.ChartType=="Bubble"]
YePieCIL,YePieCIH=CI(bootstrapMeanList(YePie,N=10000))
YeLineCIL,YeLineCIH=CI(bootstrapMeanList(YeLine,N=10000))
YeBubbleCIL,YeBubbleCIH=CI(bootstrapMeanList(YeBubble,N=10000))

In [283]:
print(YePieCIL,YePieCIH)

2.062203168608048 2.0769425798731613


In [284]:
print(YeLineCIL,YeLineCIH)

2.6680905341133045 2.679997180470243


In [285]:
print(YeBubbleCIL,YeBubbleCIH)

4.165973941435457 4.173216919187643
