In [1]:
import numpy as np
import pandas as pd

# ignoring the warnings
import warnings
warnings.filterwarnings('ignore')

# We will display plots right inside Jupyter Notebook
%matplotlib inline
import matplotlib.pyplot as plt

# We will use the Seaborn library
import seaborn as sns
sns.set()

# Graphics in SVG format are more sharp and legible. Sometimes it can take longer to load.
# Use : %config InlineBackend.figure_format = 'png' in that case
%config InlineBackend.figure_format = 'svg'

In [2]:
df = pd.read_csv('Data/Expert_Opinions.csv')

In [None]:
df = df.drop([0],axis=0) # deleting the heading row

In [None]:
df.head()

Each row represents an **Expert** entry and each column represents a **Participant** entry.

In [None]:
df.columns

In [None]:
df = df.drop(['Unnamed: 1'],axis=1) # deleting the timestamp column

In [None]:
df = df.dropna(axis=1)

In [None]:
df.head()

So, we see that there are 120 columns. This is because there are 10 participants and 12 responses for each participants. The breakdown of questions are:
- 1 question for choosing the better mockup.
- 1 question for filling in the comments.
- 5 questions with 2 subparts (one for each option).

#### Creating meaningful columns

In [None]:
# Setting the constants
numQues = 5
colsPerParticipant = 2*numQues + 2
numParticipants = int(len(df.columns)/colsPerParticipant)

In [None]:
ques = ['Q' + str(i) for i in range(1,numQues+1)]
part = ['P'+str(i) for i in range(1,numParticipants+1)]
opt = ['O1','O2']
design = [part[i]+'D' for i in range(len(part))]
comments = [part[i]+'C' for i in range(len(part))] 

In [None]:
design

In [None]:
tmp = [part[j] + ques[i] for j in range(len(part)) for i in range(len(ques))]
tmp = [tmp[i] + opt[j] for i in range(len(tmp)) for j in range(len(opt))]

In [None]:
final = []
k = 0
for i in range(len(tmp)):
    if i%10 == 0:
        final.append(design[k])
        final.append(comments[k])
        k += 1
    final.append(tmp[i])

In [None]:
(final[12:24])

Above are the values collected for each participant. Each participant will have 12 questions and 1 column for each question. Hence, we have 120 columns for 10 participants. Here, the nomenclature is such:
* P1D: For participant1 what is the design preference
* P1C: For participant1 what are the comments
* P1Q1O1: For participant1 question1 option1 what is the rating given
* P1Q1O2: For participant1 question1 option2 what is the rating given

In [None]:
df.columns = final

In [None]:
df.iloc[:5,:20]

In [None]:
optionsdict = {'Option 1':1,'Option 2':2}
for c in df.columns:
    if c[-1] == 'D': df = df.replace({c:optionsdict})
# replace the columns with design choices (ends with 'D') 
# between option 1 & 2 with numeric values (makes understanding easy)

In [None]:
df.iloc[:5,:20]

In [None]:
designmap = pd.read_csv('Participant_Design_Map.csv')
[designmap] = designmap.values.T.tolist()

In [None]:
designmap

#### Separate the designs with Assistant and designs without Assistant

In [None]:
with_assistant_cols = []
without_assistant_cols = []
k = 0
for c in df.columns:
    # Uncomment the lines below to add the cols which donot contain 'O' (the 'D' and 'C' cols)
    if 'O' not in c:
        ;
#         with_assistant_cols.append(c)
#         without_assistant_cols.append(c)
    # condition for design drawn with the assistant
    elif (c.endswith('1') and designmap[k] == 1)\
         or (c.endswith('2') and designmap[k] == 0):
        with_assistant_cols.append(c)
    # condition for design drawn without the assistant
    else:
        without_assistant_cols.append(c)
    
    if (df.columns.get_loc(c) + 1)%(colsPerParticipant) == 0: k += 1

In [None]:
wi_df = df[with_assistant_cols]
wo_df = df[without_assistant_cols]

- *wi_df* is the dataframe for designs drawn with the assistant 
- *wo_df* is the dataframe for designs drawn without the assistant

In [None]:
wi_df.describe()

## Visualization

In [None]:
# Getting rid of the 'P*' and 'O*' substrings
wi_df.columns = [x[-4:-2] for x in wi_df.columns]
wo_df.columns = [x[-4:-2] for x in wo_df.columns]

In [None]:
wi_df.iloc[:5,:]

In [None]:
# Grouping across different participants for question
wi_df_grouped_mean = wi_df.groupby(wi_df.columns, axis=1).mean()
wo_df_grouped_mean = wo_df.groupby(wo_df.columns, axis=1).mean()
# Taking mean for each question
wo_df_mean = wo_df_grouped.mean()
wi_df_mean = wi_df_grouped.mean()

In [None]:
# Similar to above, taking medians of median
wi_df_grouped_median = wi_df.groupby(wi_df.columns, axis=1).median()
wo_df_grouped_median = wo_df.groupby(wo_df.columns, axis=1).median()
wo_df_median = wo_df_grouped.median()
wi_df_median = wi_df_grouped.median()

In [None]:
ques_df_mean = pd.DataFrame([wo_df_mean, wi_df_mean]).T
ques_df_mean # col-0: Without the assistant, col-1: With the assistant

In [None]:
ques_df_median = pd.DataFrame([wo_df_median, wi_df_median]).T
ques_df_median # col-0: Without the assistant, col-1: With the assistant

In [None]:
def plot_ques_wise(df,ylabel):
    labels = ['Usability','Completeness','Familiarity','Atractiveness','Consistency']
    df.plot(kind='bar')
    plt.legend(['Without Assistant','With Assistant'])
    plt.xticks(range(5), labels, rotation=10)
    plt.xlabel('Parameters')
    plt.ylabel(ylabel)

### TODO: Add labels and whiskers to each bar

In [None]:
plot_ques_wise(ques_df_mean,'Mean Rating')

In [None]:
plot_ques_wise(ques_df_median, 'Median Rating')

### What are the other kinds of plots that can be drawn from experts data?