In [None]:
import numpy as np
import importlib
from matplotlib import pyplot as plt
import pandas as pd
import math
import os
from scipy import stats
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
import sklearn
%matplotlib inline

import analyzer
import subject
import visualization
import utils
importlib.reload(analyzer)
importlib.reload(subject)
importlib.reload(visualization)
importlib.reload(utils)

In [None]:
rootPath = os.getcwd()
dataFilePath = os.path.join(rootPath, 'data')
surveyDataFilePath = os.path.join(dataFilePath, 'survey.csv')
loggedDataFilesRootPath = dataFilePath

In [None]:
resultAnalyzer = analyzer.ResultAnalyzer(surveyDataFilePath, loggedDataFilesRootPath)

In [None]:
resultAnalyzer.gradeAnswers(confidenceThreshold=10)

In [5]:
resultAnalyzer.computeIGD(os.path.join(rootPath, 'target_selection.csv'), useShortestDistance=True)

In [6]:
# resultAnalyzer.computeHV(os.path.join(rootPath, 'target_selection.csv'))

In [7]:
s0 = resultAnalyzer.subjects[0]
s1 = resultAnalyzer.subjects[1]
s2 = resultAnalyzer.subjects[2]
s3 = resultAnalyzer.subjects[3]
s4 = resultAnalyzer.subjects[4]
s5 = resultAnalyzer.subjects[5]
s6 = resultAnalyzer.subjects[6]
s7 = resultAnalyzer.subjects[7]


# Get DataFrame

In [8]:
df = resultAnalyzer.getDataFrame(option="all",excludeHV=True, adjustIGDUsingEntropy=False)
# resultAnalyzer.getDataFrame(option="learningTask")
# resultAnalyzer.getDataFrame(option="featureSynthesis")
# resultAnalyzer.getDataFrame(option="designSynthesis")

In [9]:
df1 = df[['FScore','meanDist2UP','meanIGD','selfAssessment']]
print(df1.to_string(index=False))

 FScore  meanDist2UP  meanIGD  selfAssessment
   0.61        0.831    0.073            2.50
   0.22        0.436    0.034            1.50
   0.50        0.813    0.076            2.50
   0.39        0.623    0.019            2.25
   0.50        0.510    0.156            3.00
   0.28        0.915    0.106            2.25
   0.78        0.674    0.066            3.25
   0.50        0.807    0.017            2.50


# Rename variable 

In [10]:
df = resultAnalyzer.getDataFrame(option="all", excludeHV=False, adjustIGDUsingEntropy=False)

namingScheme = {'fcl':'FID',
                'fpwc':'FPWC',
                'dcl':'DID',
                'dpwc':'DPWC',
                'conf_fcl':'confFID',
                'conf_fpwc':'confFPWC',
                'conf_dcl':'confDID',
                'conf_dpwc':'confDPWC',
                'adjustedIGD':'DSYN',
                'normalizedDist2UP':'FSYN',
#                 'normalizedSA':'SA',
#                 'normalizedSAQ1':'SA',
                'normalizedSAExclude1':'SA',
                'normalizedCM':'CM',
                'totalScore':'TestScore',
            }

# df = df.rename(columns=namingScheme, inplace=True)

for oldName in namingScheme:
    if oldName in df.columns:
        newName = namingScheme[oldName]
        df[newName] = df[oldName].values

In [11]:
conditionAlias = {"interactive":"IKE",
                 "automated":"AKE",
                 "manual":"MKE"}

newCol = []
for cond in df["condition"].values:
    newCol.append(conditionAlias[cond])

df["condition"] = newCol

In [12]:
df.columns

Index(['id', 'condition', 'type1', 'type2', 'LT_numDesignViewed',
       'normalizedSAExclude1', 'FS_numFilterUsed', 'entropy',
       'numHighLevelEdges', 'NScore', 'confDID', 'HScore',
       'DS_numDesignEvaluated', 'normalizedSAQ1', 'normalizedCM', 'confFID',
       'FS_numFeatureTested', 'FID', 'numNodes', 'LT_numFeatureFound',
       'totalScore', 'adjustedIGD', 'normalizedDist2UP', 'FPWC', 'DScore',
       'meanIGD', 'adjustedHV', 'confFPWC', 'LT_numFeatureViewed',
       'selfAssessmentQ1', 'confDPWC', 'FS_numFeatureViewed',
       'LT_numFilterUsed', 'DPWC', 'numHighLevelConcepts', 'normalizedSA',
       'selfAssessment', 'numEdges', 'normalizedIGD', 'LScore', 'FScore',
       'meanDist2UP', 'DS_numDesignViewed', 'DID', 'PScore', 'numDesigns',
       'HV', 'selfAssessmentExclude1', 'DSYN', 'FSYN', 'SA', 'CM',
       'TestScore'],
      dtype='object')

In [13]:
df[["DID","meanIGD","normalizedIGD","DSYN"]]

Unnamed: 0,DID,meanIGD,normalizedIGD,DSYN
0,0.778,0.073,0.597,0.706
1,0.667,0.034,0.878,0.297
2,0.444,0.076,0.576,0.511
3,0.667,0.019,0.986,1.0
4,0.667,0.156,0.0,0.0
5,0.444,0.106,0.36,0.213
6,1.0,0.066,0.647,0.328
7,0.778,0.017,1.0,0.507


# Set Visualizer

In [14]:
g1 = [s2, s5]
g2 = [s1, s4]
g3 = [s0, s3, s6]
# g3 = [s6]

In [15]:
visualizer = visualization.Visualizer(groups=[g1,g2,g3], groupNames=['MKE','AKE','IKE'])
visualizer.setDataFrame(df)

# Synthesis task data visualization

In [None]:
visualizer.designSynthesisScatter(markers=['o','^','d'])


In [None]:
visualizer.featureSynthesisScatter(markers=['o','^','d'])


# Parallel coordinates

In [None]:
visualizer.parallelCoordinates(columns=['FID','FPWC','DID','DPWC'], 
                                figsize=(11,5))

In [None]:
visualizer.parallelCoordinates(columns=['FID','FPWC','DID','DPWC','FScore','DScore','PScore','NScore','HScore','LScore','totalScore'], 
                                figsize=(15,5))

In [None]:
PROP_CYCLE = plt.rcParams['axes.prop_cycle']
COLORS = PROP_CYCLE.by_key()['color']

In [None]:
tempVis = visualization.Visualizer(groups=[g3], groupNames=['IKE'])
tempDF = resultAnalyzer.getDataFrame().loc[df['condition'] == "IKE"]
tempVis.setDataFrame(tempDF)
tempVis.parallelCoordinates(columns=['FID','FPWC','DID','DPWC','FScore','DScore','PScore','NScore','HScore','LScore','totalScore'], 
                                figsize=(15,5))


In [None]:
tempVis = visualization.Visualizer(groups=[g3], groupNames=['AKE'])
tempDF = resultAnalyzer.getDataFrame().loc[df['condition'] == "AKE"]
tempVis.setDataFrame(tempDF)

tempVis.parallelCoordinates(columns=['FID','FPWC','DID','DPWC','FScore','DScore','PScore','NScore','HScore','LScore','totalScore'], 
                                figsize=(15,5),
                                colors=[COLORS[1]])



In [None]:
tempVis = visualization.Visualizer(groups=[g3], groupNames=['MKE'])
tempDF = resultAnalyzer.getDataFrame().loc[df['condition'] == "MKE"]
tempVis.setDataFrame(tempDF)
tempVis.parallelCoordinates(columns=['FID','FPWC','DID','DPWC','FScore','DScore','PScore','NScore','HScore','LScore','totalScore'], 
                                figsize=(15,5),
                            colors=[COLORS[2]])


# Box plots

In [None]:
# visualizer.boxPlot(columns=['FScore','DScore', 'totalScore'], nrows=1, ncols=3, sharey=True, figsize=(14,4.5))

In [None]:
# visualizer.boxPlot(columns=['PScore','NScore', 'FScore'], nrows=1, ncols=3, sharey=True, figsize=(14,4.5))

In [None]:
# visualizer.boxPlot(columns=['HScore','LScore', 'FScore'], nrows=1, ncols=3, sharey=True, figsize=(14,4.5))

In [None]:
# visualizer.boxPlot(columns=['meanDist2UP', 'meanIGD', 'totalScore'], nrows=1, ncols=3, sharey=False, figsize=(14,4.5))

In [None]:
visualizer.barPlot(columns=['FID','FPWC','DID','DPWC','FSYN','DSYN','SA'], 
                   showError=True, 
                   nrows=4, 
                   ncols=2, 
                   sharey=True, 
                   subplotsAdjust={"hspace":0.35, "wspace":0.1}, 
                   subplotsHide=[7],
                   figsize=(8,14))


In [None]:
visualizer.barPlot(columns=['FScore','DScore', 'TestScore'], 
                   showError=True, nrows=1, ncols=3, 
                   subplotsAdjust={"wspace":0.08}, 
                   sharey=True, figsize=(13,3.5))


In [None]:
visualizer.barPlot(columns=['FID','FPWC','DID','DPWC'], showError=True, nrows=1, ncols=4, sharey=True, figsize=(16,3))


In [None]:
visualizer.barPlot(columns=['PScore','NScore','HScore','LScore'], 
                   showError=True, 
                   nrows=2, ncols=2, sharey=True, 
                   subplotsAdjust={"hspace":0.35, "wspace":0.1}, 
                   figsize=(8,7))

In [None]:
# visualizer.barPlot(columns=['PScore','NScore', 'FScore'], showError=True, nrows=1, ncols=3, sharey=True, figsize=(13,3))

In [None]:
# visualizer.barPlot(columns=['HScore','LScore', 'FScore'], showError=True, nrows=1, ncols=3, sharey=True, figsize=(13,3))

In [None]:
visualizer.barPlot(dataFrame=df, columns=['selfAssessment', 'meanIGD', 'meanDist2UP', 'totalScore'], showError=True, nrows=1, ncols=4, sharey=False, figsize=(15,3))
# visualizer.barPlot(dataFrame=df, columns=['selfAssessmentExclude1', 'meanIGD', 'meanDist2UP', 'totalScore'], showError=True, nrows=1, ncols=4, sharey=False, figsize=(15,3))


In [None]:
visualizer.barPlot(dataFrame=df, columns=['normalizedIGD', 'adjustedIGD', 'selfAssessment', 'totalScore'], showError=True, nrows=1, ncols=4, sharey=False, figsize=(15,3))


In [None]:
# visualizer.barPlot(dataFrame=df, columns=['HV', 'adjustedHV', 'selfAssessment', 'totalScore'], showError=True, nrows=1, ncols=4, sharey=False, figsize=(15,3))


In [None]:
visualizer.barPlot(dataFrame=df, columns=["numNodes","numEdges","numHighLevelEdges","totalScore"], showError=True, nrows=1, ncols=4, sharey=False, figsize=(15,3))


# Box plots - Logged Data

In [None]:
# df_lt = resultAnalyzer.getDataFrame(option="learningTask",columns=['totalScore'])
# visualizer.boxPlot(columns=['LT_numDesignViewed','LT_numFeatureViewed','totalScore'], dataFrame=df_lt, nrows=1, ncols=3, sharey=False, figsize=(14,3.5))


In [None]:
df_lt = resultAnalyzer.getDataFrame(option="learningTask", columns=["totalScore"])
visualizer.barPlot(columns=['LT_numDesignViewed','LT_numFeatureViewed','totalScore'], dataFrame=df_lt, showError=True, nrows=1, ncols=3, sharey=False, figsize=(14,3.5))


In [None]:
# df_fs = resultAnalyzer.getDataFrame(option="featureSynthesis")
# visualizer.boxPlot(columns=['meanDist2UP','FS_numFeatureViewed', 'FS_numFilterUsed','FS_numFeatureTested'], dataFrame=df_fs, nrows=1, ncols=4, sharey=False, figsize=(16,4))



In [None]:
df_fs = resultAnalyzer.getDataFrame(option="featureSynthesis", invertSIB=False)
visualizer.barPlot(columns=['meanDist2UP','FS_numFeatureViewed', 'FS_numFilterUsed','FS_numFeatureTested'], dataFrame=df_fs, showError=True, nrows=1, ncols=4, sharey=False, figsize=(16,3.5))


In [None]:
# df_ds = resultAnalyzer.getDataFrame(option="designSynthesis")
# visualizer.boxPlot(columns=['meanIGD','numDesigns', 'DS_numDesignViewed'], dataFrame=df_ds, nrows=1, ncols=3, sharey=False, figsize=(14,3.5))



In [None]:
df_ds = resultAnalyzer.getDataFrame(option="designSynthesis", invertSIB=False)
visualizer.barPlot(columns=['meanIGD','numDesigns', 'DS_numDesignViewed'], dataFrame=df_ds, showError=True, nrows=1, ncols=3, sharey=False, figsize=(14,3.5))


# Statistical tests

In [None]:
dfi = df.loc[df['condition'] == "IKE"]
dfa = df.loc[df['condition'] == "AKE"]
dfm = df.loc[df['condition'] == "MKE"]

In [None]:
conditions = ["MKE", "AKE", "IKE"]
measures = ["FID","FPWC","DID","DPWC","FSYN","DSYN","SA"]

out = []
for mea in measures:
    line = [mea]
    for cond in conditions:
        dat = None
        if cond == "MKE":
            dat = dfm
        elif cond == "AKE":
            dat = dfa
        elif cond == "IKE":
            dat = dfi
        
        mean = round(np.mean(dat[mea]),2)
        stdev = round(np.std(dat[mea]),2)
        temp = "{0} ({1})".format(mean, stdev)
        line.append(temp)
    
    out.append(" & ".join(line))

print(" \\\\\n".join(out))
print()

for mea in measures:
    t1, p1 = stats.ttest_ind(dfi[mea], dfa[mea])
    t2, p2 = stats.ttest_ind(dfi[mea], dfm[mea])
    t3, p3 = stats.ttest_ind(dfa[mea], dfm[mea])
    p1 = p1/2
    p2 = p2/2
    p3 = p3/2
    if p1  < 0.05:
        print("{0} IKE-AKE | p-val: {1}".format(mea, p1))
    if p2  < 0.05:
        print("{0} IKE-MKE | p-val: {1}".format(mea, p2))
    if p3  < 0.05:
        print("{0} AKE-MKE | p-val: {1}".format(mea, p3))

In [None]:
conditions = ["MKE", "AKE", "IKE"]
measures = ["FScore","DScore","TestScore"]

out = []
for mea in measures:
    line = [mea]
    for cond in conditions:
        dat = None
        if cond == "MKE":
            dat = dfm
        elif cond == "AKE":
            dat = dfa
        elif cond == "IKE":
            dat = dfi
        
        mean = round(np.mean(dat[mea]),2)
        stdev = round(np.std(dat[mea]),2)
        temp = "{0} ({1})".format(mean, stdev)
        line.append(temp)
    
    out.append(" & ".join(line))

print(" \\\\\n".join(out))
print()

for mea in measures:
    t1, p1 = stats.ttest_ind(dfi[mea], dfa[mea])
    t2, p2 = stats.ttest_ind(dfi[mea], dfm[mea])
    t3, p3 = stats.ttest_ind(dfa[mea], dfm[mea])
    p1 = p1/2
    p2 = p2/2
    p3 = p3/2
    if p1  < 0.05:
        print("{0} IKE-AKE | p-val: {1}".format(mea, p1))
    if p2  < 0.05:
        print("{0} IKE-MKE | p-val: {1}".format(mea, p2))
    if p3  < 0.05:
        print("{0} AKE-MKE | p-val: {1}".format(mea, p3))

In [None]:
conditions = ["MKE", "AKE", "IKE"]
measures = ["PScore","NScore","HScore","LScore"]

out = []
for mea in measures:
    line = [mea]
    for cond in conditions:
        dat = None
        if cond == "MKE":
            dat = dfm
        elif cond == "AKE":
            dat = dfa
        elif cond == "IKE":
            dat = dfi
        
        mean = round(np.mean(dat[mea]),2)
        stdev = round(np.std(dat[mea]),2)
        temp = "{0} ({1})".format(mean, stdev)
        line.append(temp)
    
    out.append(" & ".join(line))

print(" \\\\\n".join(out))
print()

for mea in measures:
    t1, p1 = stats.ttest_ind(dfi[mea], dfa[mea])
    t2, p2 = stats.ttest_ind(dfi[mea], dfm[mea])
    t3, p3 = stats.ttest_ind(dfa[mea], dfm[mea])
    p1 = p1/2
    p2 = p2/2
    p3 = p3/2
    if p1  < 0.05:
        print("{0} IKE-AKE | p-val: {1}".format(mea, p1))
    if p2  < 0.05:
        print("{0} IKE-MKE | p-val: {1}".format(mea, p2))
    if p3  < 0.05:
        print("{0} AKE-MKE | p-val: {1}".format(mea, p3))

In [None]:
measures = ["FID","FPWC","DID","DPWC","FSYN","DSYN","SA"] + ["FScore","DScore","TestScore"] + ["PScore","NScore","HScore","LScore"]

for mea in measures:
    f,p = stats.f_oneway(dfi[mea], dfa[mea], dfm[mea])
    if p < 0.05:
        print("{0} | p-val: {1}".format(mea, p))

In [None]:
# from sklearn.linear_model import LinearRegression

# # X = data.iloc[:, 0].values.reshape(-1, 1)  # values converts it into a numpy array

# X = df[['condition','NScore']].values
# y = df['totalScore'].values.reshape(-1, 1)  # -1 means that calculate the dimension of rows, but have 1 column
# linear_regressor = LinearRegression()  # create object for the class
# linear_regressor.fit(X, y)  # perform linear regression


In [None]:
# linear_regressor.score(X, y)

In [None]:
# linear_regressor.coef_

In [None]:
# import statsmodels.api as sm
# import statsmodels.formula.api as smf

# data = sm.datasets.get_rdataset("dietox", "geepack").data
# md = smf.mixedlm("Weight ~ Time", data, groups=data["Pig"])
# mdf = md.fit()
# print(mdf.summary())

# Survey results

In [None]:
visualizer.featurePrefPlot(figsize=(10,18))

1
- Two radars should not be assigned to the same orbit
- Instruments {AERO_POL, CPR_RAD, SAR_ALTIM} are not assigned to the same orbit

2.
- Instruments that measure ocean color are not assigned to AM orbit
- AERO_LID, CPR_RAD are not assigned to SSO-600-DD orbit

3.
- Atmospheric chemistry instruments are assigned together in the same orbit
- HIRES_SOUND and HYP_IMAG are assigned together in the same orbit

4.
- SSO-800-DD is empty, except when it contains VEG_LID and/or SAR_ALTIM
- {OCE_SPEC, AERO_POL, AERO_LID, HYP_ERB, CPR_RAD, VEG_INSAR, CHEM_UVSPEC,CHEM_SWIRSPEC, HYP_IMAG, HIRES_SOUND} are not assigned to SSO-800-DD

5.
- VEG_INSAR is not used, except when it is assigned to LEO-600-polar
- HYP_ERB is not assigned to any of the orbits in the set {LEO-600-polar, SSO-600-AM, SSO-600-DD, SSO-800-DD}

6.
- Radars and lidars are not assigned to the same orbit, except when VEG_LID and VEG_INSAR are assigned together
- AERO_LID and CPR_RAD is not assigned together in the same orbit, and SAR_ALTIM and AERO_LID are not assigned together in the same orbit

7.
- AERO_POL and SAR_ALTIM are NOT assigned together in the same orbit
- CHEM_UVSPEC and HIRES_SOUND are assigned together in at least one of the orbits

8.
- OCE_SPEC is assigned to LEO-600-polar
- VEG_INSAR is NOT assigned to SSO-600-AM

9.
- AERO_LID is NOT used
- SAR_ALTIM is used

In [None]:
visualizer.selfAssessmentPlot(displayStderr=True)

1. I learned a lot about the given design problem from this exercise
2. The features I learned from this exercise will be useful to solve a different (but similar)
design problem in the future
3. The tool was useful in checking hypotheses I had based on my prior knowledge
4. The tool was useful in checking new hypotheses I had as I analyzed the data.

0=strongly disagree, 1=disagree, 2=undecided, 3=agree, 4=strongly agree

# Get transcript comments

In [None]:
subjects_manual = resultAnalyzer.filterSubjects(condition=4)
subjects_auto = resultAnalyzer.filterSubjects(condition=5)
subjects_interactive = resultAnalyzer.filterSubjects(condition=6)

In [None]:
comments = resultAnalyzer.getComments(resultAnalyzer.subjects, "problem_solving_task", "F_cl_3", displayCondition=True, displayParticipantID=False, displayKeyword=False)

for line in comments:
    print(line)
    print("")
    

In [None]:
resultAnalyzer.getComments(subjects_manual, "problem_solving_task", "F", displayParticipantID=False, displayKeyword=True)


In [None]:
resultAnalyzer.getComments(subjects_auto, "problem_solving_task", "F_cl", displayParticipantID=False, displayKeyword=True)


In [None]:
comments = resultAnalyzer.getComments(resultAnalyzer.subjects, "survey", "gen", displayCondition=True, displayParticipantID=False, displayKeyword=True)

for line in comments:
    print(line + "\n")

## Correlation 

In [None]:
sns.regplot(x="SA", y="totalScore", data=df, ci=95)

In [None]:
mod = sm.OLS(np.array(df['SA']), np.array(df['totalScore']))
res = mod.fit()
print(res.summary())

In [None]:
sns.lmplot(x="selfAssessment", y="totalScore", hue="condition", markers=["o","x","^"], data=df, ci=None);
# sns.lmplot(x="selfAssessmentExclude1", y="totalScore", hue="condition", markers=["o","x","^"], data=df, ci=None);

In [None]:
# sns.lmplot(x="selfAssessment", y="totalScore", hue="type1", markers=["o","x"], data=df, ci=None);
# sns.lmplot(x="selfAssessmentExclude1", y="totalScore", hue="type", markers=["o","x"], data=df, ci=None);

## Correlation - Design synthesis task

In [None]:
sns.regplot(x="DPWC", y="DSYN", data=df, ci=None);

In [None]:
xmod = sm.OLS(np.array(df['selfAssessment']), np.array(df['meanIGD']))
res = mod.fit()
print(res.summary())

In [None]:
sns.lmplot(x="selfAssessment", y="normalizedIGD", hue="condition", markers=["o","x","^"], data=df, ci=None);
# sns.lmplot(x="selfAssessmentExclude1", y="normalizedIGD", hue="condition", markers=["o","x","^"], data=df, ci=None);

In [None]:
# sns.lmplot(x="selfAssessment", y="normalizedIGD", hue="type1", markers=["o","x"], data=df, ci=None);
# sns.lmplot(x="selfAssessmentExclude1", y="normalizedIGD", hue="type", markers=["o","x"], data=df, ci=None);

In [None]:
sns.lmplot(x="totalScore", y="meanIGD", hue="condition", markers=["o","x","^"], data=df, ci=None);


In [None]:
sns.regplot(x="selfAssessment", y="numDesigns", data=df, ci=None);

In [None]:
sns.regplot(x="numDesigns", y="totalScore", data=df, ci=None);

In [None]:
sns.regplot(x="numDesigns", y="normalizedIGD", data=df, ci=None);

In [None]:
sns.regplot(x="numDesigns", y="selfAssessment", data=df, ci=None);

In [None]:
# df = df.drop(index=1)

In [None]:
sns.regplot(x="selfAssessment", y="adjustedIGD", data=df);

In [None]:
sns.regplot(x="DScore", y="adjustedIGD", data=df);

In [None]:
sns.regplot(x="DPWC", y="adjustedIGD", data=df);

## Correlation - feature synthesis task

In [None]:
sns.regplot(x="selfAssessment", y="meanDist2UP", data=df);

In [None]:
# sns.regplot(x="selfAssessmentExclude1", y="meanDist2UP", data=df);

In [None]:
sns.lmplot(x="selfAssessment", y="meanDist2UP", hue="condition", markers=["o","x","^"], data=df, ci=None);
# sns.lmplot(x="selfAssessmentExclude1", y="meanDist2UP", hue="condition", markers=["o","x","^"], data=df, ci=None);

In [None]:
# sns.lmplot(x="selfAssessment", y="meanDist2UP", hue="type", markers=["o","x"], data=df, ci=None);
# sns.lmplot(x="selfAssessmentExclude1", y="meanDist2UP", hue="type", markers=["o","x"], data=df, ci=None);

In [None]:
sns.lmplot(x="totalScore", y="meanDist2UP", hue="condition", markers=["o","x","^"], data=df, ci=None);

In [None]:
mod = sm.OLS(np.array(df['selfAssessment']), np.array(df['meanDist2UP']))
res = mod.fit()
print(res.summary())

# Correlation - concept mapping

In [None]:
sns.regplot(x="numEdges", y="totalScore", data=df);

In [None]:
sns.lmplot(x="numEdges", y="totalScore", hue="type2", markers=["o","x"], data=df, ci=None);


In [None]:
mod = smf.ols(formula='totalScore ~ numEdges * C(type2)', data=df)
res = mod.fit()
print(res.summary())

In [None]:
die here

# Etc

In [None]:
df.columns

In [None]:
sns.pairplot(df[['condition','FScore','DScore','selfAssessment']], kind="reg")

In [None]:
sns.pairplot(df[['condition','PScore','NScore','selfAssessment']], kind="reg")

In [None]:
sns.pairplot(df[['condition','FScore','normalizedDist2UP','selfAssessment']], kind="reg")

In [None]:
sns.pairplot(df[['condition','DScore','adjustedIGD','selfAssessment']], kind="reg")

In [None]:
sns.pairplot(df[['condition','numDesigns','selfAssessment','normalizedIGD']], kind="reg")

In [None]:
sns.pairplot(df[['condition','adjustedIGD','entropy','selfAssessment']], kind="reg")

In [None]:
sns.pairplot(df[['condition','confFID','FID','selfAssessment']], kind="reg")

In [None]:
die here

# Figures for learning measure comparison

In [None]:
# index_implicit = dfRenamed[ dfRenamed["type"] == "implicit" ].index
# dfRenamed = dfRenamed.drop(index_implicit)

In [16]:
dfm = df[['condition','FID','FPWC','FSYN','DID','DPWC','DSYN','CM','SA']].copy()

In [17]:
dfconf = df[['condition','FID','confFID','FPWC','confFPWC','FSYN','DID','confDID','DPWC','confDPWC','DSYN','SA']].copy()


In [18]:
dfm

Unnamed: 0,condition,FID,FPWC,FSYN,DID,DPWC,DSYN,CM,SA
0,IKE,0.889,0.333,0.175,0.778,0.667,0.706,1.0,0.4
1,AKE,0.222,0.222,1.0,0.667,0.333,0.297,0.889,0.2
2,MKE,0.444,0.556,0.213,0.444,0.333,0.511,0.0,0.6
3,IKE,0.333,0.444,0.61,0.667,0.778,1.0,0.722,0.4
4,AKE,0.444,0.556,0.846,0.667,0.222,0.0,0.833,0.8
5,MKE,0.222,0.333,0.0,0.444,0.111,0.213,0.056,0.0
6,IKE,0.889,0.667,0.503,1.0,0.778,0.328,1.0,1.0
7,AKE,0.444,0.556,0.225,0.778,0.556,0.507,0.056,0.6


In [34]:
rPearson, pPearson, ciLoPearson, ciHiPearson = utils.calculate_pearsonr(dfm, decimal=2)

In [35]:
rSpearman, pSpearman, ciLoSpearman, ciHiSpearman = utils.calculate_spearmanr(dfm, decimal=2)

In [36]:
rPearson

Unnamed: 0,FID,FPWC,FSYN,DID,DPWC,DSYN,CM,SA
FID,1.0,0.42,-0.21,0.71,0.63,0.15,0.49,0.62
FPWC,0.42,1.0,-0.1,0.38,0.31,-0.1,-0.12,0.89
FSYN,-0.21,-0.1,1.0,0.26,0.02,-0.22,0.62,0.22
DID,0.71,0.38,0.26,1.0,0.74,0.09,0.65,0.62
DPWC,0.63,0.31,0.02,0.74,1.0,0.71,0.46,0.42
DSYN,0.15,-0.1,-0.22,0.09,0.71,1.0,0.03,-0.14
CM,0.49,-0.12,0.62,0.65,0.46,0.03,1.0,0.29
SA,0.62,0.89,0.22,0.62,0.42,-0.14,0.29,1.0


In [37]:
pPearson / 2

Unnamed: 0,FID,FPWC,FSYN,DID,DPWC,DSYN,CM,SA
FID,0.0,0.15,0.31,0.025,0.05,0.36,0.11,0.05
FPWC,0.15,0.0,0.41,0.18,0.23,0.41,0.39,0.0
FSYN,0.31,0.41,0.0,0.27,0.48,0.295,0.05,0.305
DID,0.025,0.18,0.27,0.0,0.015,0.42,0.04,0.05
DPWC,0.05,0.23,0.48,0.015,0.0,0.025,0.125,0.15
DSYN,0.36,0.41,0.295,0.42,0.025,0.0,0.47,0.365
CM,0.11,0.39,0.05,0.04,0.125,0.47,0.0,0.245
SA,0.05,0.0,0.305,0.05,0.15,0.365,0.245,0.0


In [38]:
ciLoPearson

Unnamed: 0,FID,FPWC,FSYN,DID,DPWC,DSYN,CM,SA
FID,0.0,0.4,-0.24,0.7,0.61,0.12,0.47,0.6
FPWC,0.4,0.0,-0.13,0.35,0.28,-0.13,-0.15,0.88
FSYN,-0.24,-0.13,0.0,0.23,-0.01,-0.25,0.6,0.19
DID,0.7,0.35,0.23,0.0,0.73,0.06,0.64,0.61
DPWC,0.61,0.28,-0.01,0.73,0.0,0.7,0.44,0.4
DSYN,0.12,-0.13,-0.25,0.06,0.7,0.0,0.0,-0.17
CM,0.47,-0.15,0.6,0.64,0.44,0.0,0.0,0.26
SA,0.6,0.88,0.19,0.61,0.4,-0.17,0.26,0.0


In [39]:
ciHiPearson

Unnamed: 0,FID,FPWC,FSYN,DID,DPWC,DSYN,CM,SA
FID,0.0,0.45,-0.18,0.73,0.64,0.18,0.51,0.63
FPWC,0.45,0.0,-0.07,0.4,0.33,-0.07,-0.09,0.89
FSYN,-0.18,-0.07,0.0,0.28,0.05,-0.2,0.64,0.24
DID,0.73,0.4,0.28,0.0,0.75,0.11,0.67,0.64
DPWC,0.64,0.33,0.05,0.75,0.0,0.72,0.48,0.45
DSYN,0.18,-0.07,-0.2,0.11,0.72,0.0,0.06,-0.12
CM,0.51,-0.09,0.64,0.67,0.48,0.06,0.0,0.31
SA,0.63,0.89,0.24,0.64,0.45,-0.12,0.31,0.0


In [31]:
rSpearman

Unnamed: 0,FID,FPWC,FSYN,DID,DPWC,DSYN,CM,SA
FID,1.0,0.611,-0.198,0.692,0.525,0.296,0.444,0.731
FPWC,0.611,1.0,0.037,0.357,0.298,-0.012,-0.099,0.926
FSYN,-0.198,0.037,1.0,0.173,0.145,-0.214,0.313,0.253
DID,0.692,0.357,0.173,1.0,0.706,0.198,0.706,0.506
DPWC,0.525,0.298,0.145,0.706,1.0,0.735,0.457,0.341
DSYN,0.296,-0.012,-0.214,0.198,0.735,1.0,-0.024,-0.048
CM,0.444,-0.099,0.313,0.706,0.457,-0.024,1.0,0.171
SA,0.731,0.926,0.253,0.506,0.341,-0.048,0.171,1.0


In [32]:
pSpearman / 2

Unnamed: 0,FID,FPWC,FSYN,DID,DPWC,DSYN,CM,SA
FID,0.0,0.0535,0.3195,0.0285,0.0905,0.238,0.1355,0.0195
FPWC,0.0535,0.0,0.4655,0.193,0.2365,0.4885,0.4075,0.0005
FSYN,0.3195,0.4655,0.0,0.341,0.3665,0.305,0.225,0.2725
DID,0.0285,0.193,0.341,0.0,0.025,0.3195,0.025,0.1
DPWC,0.0905,0.2365,0.3665,0.025,0.0,0.019,0.1275,0.204
DSYN,0.238,0.4885,0.305,0.3195,0.019,0.0,0.4775,0.455
CM,0.1355,0.4075,0.225,0.025,0.1275,0.4775,0.0,0.343
SA,0.0195,0.0005,0.2725,0.1,0.204,0.455,0.343,0.0


In [33]:
ciLoSpearman

Unnamed: 0,FID,FPWC,FSYN,DID,DPWC,DSYN,CM,SA
FID,0.0,-0.164,-0.792,-0.024,-0.285,-0.516,-0.379,0.055
FPWC,-0.164,0.0,-0.686,-0.465,-0.515,-0.711,-0.751,0.635
FSYN,-0.792,-0.686,0.0,-0.606,-0.624,-0.798,-0.502,-0.55
DID,-0.024,-0.465,-0.606,0.0,0.004,-0.589,0.004,-0.308
DPWC,-0.285,-0.515,-0.624,0.004,0.0,0.063,-0.365,-0.478
DSYN,-0.516,-0.711,-0.798,-0.589,0.063,0.0,-0.717,-0.728
CM,-0.379,-0.751,-0.502,0.004,-0.365,-0.717,0.0,-0.607
SA,0.055,0.635,-0.55,-0.308,-0.478,-0.728,-0.607,0.0


In [None]:
ciHiSpearman

In [None]:
from scipy.stats import pearsonr, spearmanr

print(pearsonr(dfconf["FID"],dfconf["confFID"]))
print(pearsonr(dfconf["FPWC"],dfconf["confFPWC"]))
print(pearsonr(dfconf["DID"],dfconf["confDID"]))
print(pearsonr(dfconf["DPWC"],dfconf["confDPWC"]))
print()

print(spearmanr(dfconf["FID"],dfconf["confFID"]))
print(spearmanr(dfconf["FPWC"],dfconf["confFPWC"]))
print(spearmanr(dfconf["DID"],dfconf["confDID"]))
print(spearmanr(dfconf["DPWC"],dfconf["confDPWC"]))

In [None]:
sns.regplot(x="confFPWC", y="FPWC", data=dfconf, ci=95)

In [None]:
rPearson

In [None]:
x="CM"
y="SA"
sns.regplot(x=x, y=y, data=df, ci=95)
sns.lmplot(x=x, y=y, hue="condition", markers=["o","x","^"], data=df, ci=None)
sns.lmplot(x=x, y=y, hue="type1", markers=["o","x"], data=df, ci=None)
sns.lmplot(x=x, y=y, hue="type2", markers=["o","x"], data=df, ci=None)
sns.lmplot(x=x, y="totalScore", hue="type2", markers=["o","x"], data=df, ci=None)


In [None]:
# corr = dfm.corr()

# # Generate a mask for the upper triangle
# mask = np.zeros_like(corr, dtype=np.bool)
# mask[np.triu_indices_from(mask)] = True

# # Set up the matplotlib figure
# f, ax = plt.subplots(figsize=(11, 13))

# # Generate a custom diverging colormap
# cmap = sns.diverging_palette(220, 10, as_cmap=True)

# # Draw the heatmap with the mask and correct aspect ratio
# sns.heatmap(corr, mask=mask, annot=True, cmap=cmap, vmax=.3, center=0,
#             square=True, linewidths=.5, cbar_kws={"shrink": .6})

In [None]:
tempvis = visualization.Visualizer()
try:
    dfm["index"]
except:
    dfm["index"] = [i for i in range(len(dfm["condition"].values))]
tempvis.parallelCoordinates(columns=['FID','FPWC','FSYN','DID','DPWC','DSYN','CM','SA'], 
                                colors=None,
                                dataFrame=dfm,
                                grid=True,
                                figsize=(14,4), 
                                groupBy="index",
                                removeLegend=True,
                               alpha=0.9)

In [None]:
tempvis = visualization.Visualizer()
tempvis.parallelCoordinates(columns=['FID','FPWC','DID','DPWC','SA'], dataFrame=dfm,
                                colors=["black"]*3,
                                grid=True,
                                figsize=(12,4), 
                                groupBy=None,
                               alpha=0.5)

In [None]:
tempvis = visualization.Visualizer()
try:
    dfm["index"]
except:
    dfm["index"] = [i for i in range(len(dfm["condition"].values))]
tempvis.parallelCoordinateVariablePairs(varPairList=[
                                            ['FID','SA'],
                                            ['FPWC','SA'],
                                            ['DID','DPWC'],
                                            ['DPWC','DSYN'],
                                        ],
                                        colors=None,
                                        grid=True,
                                        groupBy="index",
                                        dataFrame=dfm,
                                        removeLegend=True,
                                        nrows=1, ncols=4, sharey=True, figsize=(15,3))


In [None]:
tempvis = visualization.Visualizer()
try:
    dfconf["index"]
except:
    dfconf["index"] = [i for i in range(len(dfconf["condition"].values))]
tempvis.parallelCoordinateVariablePairs(varPairList=[
                                            ['confFID','FID'],
                                            ['confFPWC','FPWC'],
                                            ['confDID','DID'],
                                            ['confDPWC','DPWC'],
                                        ],
                                        colors=["black"]*3,
                                        grid=True,
                                        groupBy="index",
                                        dataFrame=dfconf,
                                        removeLegend=True,
                                        nrows=1, ncols=4, sharey=True, figsize=(15,3))


In [None]:
df

In [None]:
for s in resultAnalyzer.subjects:
    tempdata = s.learning_self_assessment_data
    out = "{0} {1} {2} {3}".format(tempdata[0],tempdata[1],tempdata[2],tempdata[3])
    print(out)