# Auswertung PSCB1 Experiment Daten

In [2]:
# Read in data
import pandas as pd 
import scipy 
from scipy.stats import entropy
import statistics

df = pd.read_excel('/home/ansost/repos/CaSpaSyn/data/pscb1.xlsx') 
df.head()

Unnamed: 0,Lfd. Nr.,Code,Researchers,Language,Date,Start,End,Check,4,7,...,97,99,103,105,109,112,114,117,119,Comments
0,0,346IO,ABR/JS/AS,S,2023-05-15 00:00:00,12:17:00,12:29:00,Niklas,6,6,...,6,6,3,6,7,7,7,7,4,
1,1,346OL,ABR/JS/AS,S,2023-05-15 00:00:00,12:16:00,12:29:00,Niklas,7,6,...,7,7,1,7,7,7,7,7,7,
2,2,000NE,ABR/JS/AS,S,2023-05-15 00:00:00,12:55:00,13:04:00,Niklas,7,4,...,7,7,1,1,7,5,7,7,1,Muchas de estas oraciones son correctas gramat...
3,3,UAI23,ABR/JS/AS,S,2023-05-15 00:00:00,12:55:00,13:03:00,Niklas,7,7,...,3,3,1,5,7,2,7,6,5,Hay que relacionar mejor las palabras para que...
4,4,RF107,ABR/JS/AS,S,2023-05-15 00:00:00,13:00:00,13:09:00,Niklas,7,7,...,4,6,2,2,4,4,4,5,4,


### Item analysis
To examine the variability of the phenomenon, Shannon entropy (base 2) was used to quantify how variable are the items in the experimental data and how variable are the participants.
Additionally, the mean rating and te standard deviation is given. 


**by items:** 
- entropy of each experimental item (non-filler) over all participants
- Column vector of answers for the given item, turned into a probabiliy distribution and thrown into the entropy function by scipy   

In [11]:
subset = df.iloc[:, 8:48]
data = []

for item in subset.columns:
    current = {}
    # Get the current column from the dataframe 
    col = subset[item].tolist()
    
    # Check if there are any NA values and if yes replace them with 0     
    if 'X' in col or 'NA' in col:
        col = subset[item].replace('X', 0)
    
    # Turn into proportions
    proportions = [n / sum(col) for n in col]
    
    # Compute entropy
    current['item'] = item 
    current['entropy'] = round(scipy.stats.entropy(proportions), 2)
    current['stdev'] = round(statistics.stdev(col), 2)
    current['mean'] = round(statistics.mean(col), 2)
    current['median'] = round(statistics.median(col), 2)
    #print(entropy_, stdev, round(mean, 0), median)
    data.append(current)

out = pd.DataFrame(data)
out

Unnamed: 0,item,entropy,stdev,mean,median
0,4,2.64,0.47,6.71,7.0
1,7,2.62,1.21,5.93,6.0
2,9,2.55,0.92,2.07,2.0
3,13,2.59,1.61,5.5,6.0
4,15,2.44,1.42,2.21,2.0
5,19,2.64,0.36,6.86,7.0
6,22,2.61,1.41,5.86,6.5
7,24,2.51,1.9,3.71,4.0
8,28,2.62,1.28,6.43,7.0
9,30,2.43,1.89,2.79,2.0


### Participant analysis
To examine the variability of the phenomenon, Shannon entropy (base 2) was used to quantify how variable are the items in the experimental data and how variable are the participants.
Additionally, mean rating and its standard deviation are given.  


**by participants:**
 - entropy of answers by the participants over all items 
 - Row vector of answers of the given participant, turned into a probability distribution and thrown into the entropy function by scipy 


In [39]:
subset = df.iloc[:, 8:48]
data = []

for index, participant in enumerate(df['Code'].tolist()):
    current = {}
    
    # Get the current column from the dataframe 
    row = subset.loc[index, :].values.flatten().tolist()

    # Check if there are any NA values and if yes replace them with 0     
    if 'X' in row or 'NA' in row:
        row = [0 if item == 'X' or item =='NA' else int(item) for item in row]
    else:
        row = [int(element) for element in row]
    
    # Turn into proportions
    proportions = [n / sum(row) for n in row]
 
    # Compute entropy & Save each data point in dicctionary 
    current['participant'] = participant 
    current['entropy'] = round(scipy.stats.entropy(proportions), 2)
    current['stdev'] = round(statistics.stdev(row), 2)
    current['mean'] = round(statistics.mean(row), 2)
    current['median'] = round(statistics.median(row), 2)
    
    data.append(current)

out = pd.DataFrame(data)
out

Unnamed: 0,participant,entropy,stdev,mean,median
0,346IO,3.65,1.47,6.0,7.0
1,346OL,3.63,1.77,6.08,7.0
2,000NE,3.51,2.43,4.33,4.5
3,UAI23,3.58,2.09,4.67,5.0
4,RF107,3.6,1.66,4.0,4.0
5,AS303,3.56,2.12,4.42,5.0
6,AM013,3.63,1.47,4.17,4.0
7,282LT,3.57,2.37,5.4,7.0
8,460MM,3.53,2.58,5.1,7.0
9,4322AC,3.52,2.39,4.3,5.0
