In [321]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import os
from math import pi
from itertools import combinations_with_replacement
import pickle
from IPython.display import display
# pd.set_option('display.max_rows', 500)
# pd.options.display.max_colwidth=100
from natsort import natsorted
import seaborn as sns
import scipy.stats as stats 
from collections import Counter
from itertools import groupby
from operator import itemgetter
from itertools import combinations
import time
import math as math
import choix
from astropy.stats import median_absolute_deviation
import plotly.graph_objects as go

# Functions

In [2]:
def radarPlotDF(countsdf,maxpt,title,**kwargs):
    """
    makes a radar plot from a dataframe with the first two columns being
    used as 'pre' and 'post

    countssdf: a 2 column dataframe with counts
    maxpt: how to scale

    optional-
    colorcode: put color code or not? 'on' if yes.
    """
    colorcode=kwargs.get('colorcode',None)
    #set up colors
    hues=np.linspace(0,1,16,endpoint=False)
    hues=['%1.2f' % i for i in hues]
    hues=[float(i) for i in hues]
    colors = plt.cm.hsv(hues)
    # Compute pie slices
    N = 16
    theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
    radii = np.repeat(1,N)
    width = np.repeat(0.25,N)

    #set up data
    counts_df1=countsdf.iloc[:,0]
    counts_df2=countsdf.iloc[:,1]
    categories1=[str(i) for i in counts_df1.index]#cars
    N = len(categories1)
    #cars
    values1=[i for i in counts_df1.values]
    values1 += values1[:1] #makes it circular
    values2=[i for i in counts_df2.values]
    values2 += values2[:1] #makes it circular
    # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
    #this doesn't change for all. all have 16 data points
    angles = [n / float(N) * 2 * pi for n in range(N)]
    angles += angles[:1]
    # Initialise the spider plot
    ax = plt.subplot(111, polar=True)
    # Draw one axe per variable + add labels labels yet
    plt.xticks(angles[:-1], categories1, color='grey', size=8)
    # Draw ylabels
    ax.set_rlabel_position(0)
    plt.yticks([i for i in np.arange(0,maxpt)],[],color="black", size=5)
    plt.ylim(0,maxpt)
    # Plot data
    ax.plot(angles, values1, linewidth=1, linestyle='solid')
    ax.plot(angles, values2, linewidth=1, linestyle='solid')
    # Fill area
    ax.fill(angles, values1, 'b', alpha=0.1)
    ax.fill(angles, values2, 'r', alpha=0.1)
    if colorcode=='on':
      bars = ax.bar(theta, radii, width=width, bottom=15, color=colors)
    ax.legend(['Pre','Post'])
    plt.title(title)
    
def interactiveRadar(countsDF,subname,category):
    counts_df1=countsDF.iloc[:,0]#pre
    counts_df2=countsDF.iloc[:,1]#post
    counts_df3=countsDF.iloc[:,2]#pre
    categories1=[str(i)[0:7] for i in counts_df1.index]#shirts
    N = len(categories1)

    values1=[i for i in counts_df1.values]
    values1 += values1[:1] #makes it circular
    values2=[i for i in counts_df2.values]
    values2 += values2[:1] #makes it circular
    values3=[i for i in counts_df3.values]
    values3 += values3[:1] #makes it circular


    fig = go.Figure()
    
    #plot pre
    fig.add_trace(go.Scatterpolar(
        opacity=0.75,
          r=values1,
          theta=categories1,
          fill='toself',
          name='Pre'
    ))
    #plot post
    fig.add_trace(go.Scatterpolar(
        opacity=0.75,
          r=values2,
          theta=categories1,
          fill='toself',
          name='Post '
    ))
    #plot follow-up
    fig.add_trace(go.Scatterpolar(
        opacity=0.75,
        r=values3,
        theta=categories1,
        fill='toself',
        name='3 Day '
    ))

    fig.update_layout(
    title=subname+' '+ category,
      polar=dict(
        radialaxis=dict(
          visible=True,
            showline=False,
              range=[0, 16]
        )),
      showlegend=True
    )

    fig.show()


def getPercentile(data):
    p=[]
    for q in [np.concatenate((np.arange(1,6,1),np.arange(95,100,1)))]:
        p.append(np.percentile(data,q))
        # print ("{}%% percentile: {}".format (q, np.percentile(randSim.aveAllMag, q)))
    percentiles=pd.DataFrame(p,columns=q)
    return(percentiles)
def cdfTable(data,binz,spacing):
    cdf=plt.hist(data,bins=np.arange(1,binz,spacing),density=True,cumulative=True)[0]
    plt.close()
    cdftable=pd.DataFrame(cdf,index=np.arange(1,binz-spacing,spacing))
    return(cdftable.T)
def mycdf(data):
    data_size=len(data)
    # Set bins edges
    data_set=sorted(set(data))
    bins=np.append(data_set, data_set[-1]+1)
    # Use the histogram function to bin the data
    counts, bin_edges = np.histogram(data, bins=bins, density=False)
    counts=counts.astype(float)/data_size
    # Find the cdf
    cdf = np.cumsum(counts)
    # Plot the cdf
    plt.plot(bin_edges[0:-1], cdf,linestyle='--', marker="o", color='b')
    plt.ylim((0,1))
    plt.ylabel("CDF")
    plt.grid(True)
  # plt.show()
def colorcode(value):
    """
    Colors elements in a dateframe
    green if over 0.95
    """
    ncomp=3
    if value >= 1-(0.01/ncomp) or value <= (0.01/ncomp):
        color = 'green'
    else:
        color = 'black'
    return 'color: %s' % color
def getPscore(data,score):
    pscore=stats.percentileofscore(data,score,kind='mean')
    return(pscore/100)


# About

All statistics are split into two categories, counts and ranks. The simulations were run with both ranks and counts. Counts statistics look solely at the number of wins, but do not consider the relative relationship of the counts within a list. The ranking method does. If there are ties, an average of the rank is given

The true counts of each subject do show differences in the counts and ranks statistics, (except for kD, which is based on ranks for both anyway). These differences are not major and there is a correlation, but the differences can be seen from the radar plots as well. 

There are some differences in the overall outcomes of the tests in counts vs. ranks


**aveAllStrLen**: average length of the string. calculated as the overall length of pre and post strings divided by the total # of strings

*    if no ties, total len must add to 16, so ranges from 16/2 =>8 to 16/16 =>1, anti-correlated with allNumStr. 
*     decreases with spikeyness

**ESD**: uses the grubbs test sequentially to test for up to n number of outliers. here setting n=3,sig=0.05

**kD**: distance metric derived from kendall to indicate dissimilarity or distance between two lists. proceeds by ranking and then computing number of rank order disagreements



In [3]:
def getFinal(df):
    """
    makes a final table of all statistics
    """
    checkVal=1-(0.01/3)
    subkeeper=[i for i in df.columns]
    Results=pd.DataFrame({'Range':df.loc['aveAllStrLen'].gt(checkVal).values,
                          'Outlier':df.loc['ESD'].gt(checkVal).values,
#                           'Mixed':df.loc['allOverallStrSum'].gt(checkVal).values,
                         'kD': df.loc['kD'].gt(checkVal).values},index=df.columns)
#     return(Results)
    Results['None']=[not(Results.loc[i].any()) for i in df.columns]
    NoneKeeper=Results.index[Results['None']].values
    [subkeeper.remove(i) for i in NoneKeeper]
    RangeKeeper=Results.index[Results.Range]
    [subkeeper.remove(i) for i in RangeKeeper]
    OutlierKeeper=Results.index[Results.Outlier]
    [subkeeper.remove(i) for i in OutlierKeeper]
    kDKeeper=Results.index[Results.kD]
#     [subkeeper.remove(i) for i in kDKeeper]
    return(pd.DataFrame({'Range':[[int(re.findall(r'\d+',i)[0]) for i in RangeKeeper]],
                         'Outlier':[[int(re.findall(r'\d+',i)[0]) for i in OutlierKeeper]],
#                          'Mixed':[[int(re.findall(r'\d+',i)[0]) for i in subkeeper]],
                         'kD':[[int(re.findall(r'\d+',i)[0]) for i in kDKeeper]],
                         'None':[[int(re.findall(r'\d+',i)[0]) for i in NoneKeeper]]}))

# load first visit

In [4]:
#subject clas for loading
class Subject():
    def __init__(self,subname):
        self.subname=subname
#load
os.chdir(r"C:\Users\al33m\Box Sync\Grzywacz Lab\Experiment\Data_Files\schlosspilot\round2\AB\pickle\sim")
subs={}
for i in ['sub'+str(i) for i in np.arange(1,11)]:
    print(i)
    with open(i+'.pkl', 'rb') as input:
        subs[i] = pickle.load(input)

sub1
sub2
sub3
sub4
sub5
sub6
sub7
sub8
sub9
sub10


# load follow up

In [5]:
class FUPSubject():
    def __init__(self,subname):
        self.subname=subname
#load
os.chdir(r"C:\Users\al33m\Box Sync\Grzywacz Lab\Experiment\Data_Files\schlosspilot\round2\Visit2\pickle\sim")
FUPsubs={}
for i in ['sub'+str(i) for i in [2,4,6,7,8,9,10]]:
    print(i)
    with open(i+'.pkl', 'rb') as input:
        FUPsubs[i] = pickle.load(input)

sub2
sub4
sub6
sub7
sub8
sub9
sub10


# Shirts

In [6]:
checkList=['aveAllStrLen','ESD','kD']

## True Counts - (Pre-Post) AB

### counts based

In [7]:
#make subs true counts
AB_ShirtsTrueCounts=pd.DataFrame()
for i in subs.keys():
    AB_ShirtsTrueCounts=AB_ShirtsTrueCounts.append(subs[i].AB_ShirtsTrueCounts)
AB_ShirtsTrueCounts.index=subs.keys()
AB_ShirtsTrueCounts[checkList]

Unnamed: 0,aveAllStrLen,ESD,kD
sub1,2.0,0,33
sub2,3.5,1,14
sub3,2.0,0,33
sub4,3.0,0,15
sub5,2.0,0,63
sub6,2.5,0,14
sub7,1.625,0,31
sub8,2.0,0,55
sub9,1.5,0,13
sub10,1.5,0,10


## Simulation - (Pre-Post) AB

### from Counts

In [177]:
for i in subs.keys():
    subs[i].AB_ShirtsCountsInfoSim=pd.read_pickle(r"C:\Users\al33m\Google Drive\GU Neuroaesthetics Lab\SimData\FirstVisitSim\Sims\Shirts\countsInfo\AB\fromCounts/"+i+'_AB_ShirtsCountsInfoSim.pkl')


In [8]:
AB_ShirtsCountTestDF=pd.DataFrame()
for i in subs.keys():
  Pscore=[]
  for measure in checkList:
    # print(measure)
    Pscore.append(getPscore(getattr(subs[i],'AB_ShirtsCountsInfoSim')[measure],subs[i].AB_ShirtsTrueCounts[measure][0]))
  AB_ShirtsCountTestDF=AB_ShirtsCountTestDF.append(pd.DataFrame(Pscore).T)
AB_ShirtsCountTestDF.columns=checkList
AB_ShirtsCountTestDF.index=[i for i in subs.keys()]
AB_ShirtsCountTestDF=AB_ShirtsCountTestDF.T
AB_ShirtsCountTestDF.style.applymap(colorcode)

Unnamed: 0,sub1,sub2,sub3,sub4,sub5,sub6,sub7,sub8,sub9,sub10
aveAllStrLen,0.612,0.951,0.598,0.9235,0.562,0.876,0.2815,0.564,0.226,0.3455
ESD,0.405,0.934,0.42,0.398,0.411,0.4065,0.394,0.377,0.417,0.39
kD,1.0,0.954,0.3925,0.7815,0.9415,0.9355,0.983,1.0,0.821,0.804


In [9]:
getFinal(AB_ShirtsCountTestDF)

Unnamed: 0,Range,Outlier,kD,None
0,[],[],"[1, 8]","[2, 3, 4, 5, 6, 7, 9, 10]"


## True Counts - (Pre-FollowUp) AC

### from counts

In [12]:
#make FUPsubs true counts
AC_ShirtsTrueCounts=pd.DataFrame()
for i in FUPsubs.keys():
    AC_ShirtsTrueCounts=AC_ShirtsTrueCounts.append(FUPsubs[i].AC_ShirtsTrueCounts)
AC_ShirtsTrueCounts.index=FUPsubs.keys()
AC_ShirtsTrueCounts[checkList]

Unnamed: 0,aveAllStrLen,ESD,kD
sub2,2.16667,0,12
sub4,2.5,1,21
sub6,1.4,0,13
sub7,1.4,0,16
sub8,1.8,0,61
sub9,2.75,0,12
sub10,1.375,1,10


## Simulation - (Pre-FollowUp) AC

### from counts

In [13]:
AC_ShirtsCountTestDF=pd.DataFrame()
for i in FUPsubs.keys():
  Pscore=[]
  # print(i)
  for measure in checkList:
    # print(measure)
    Pscore.append(getPscore(getattr(FUPsubs[i],'AC_ShirtsCountsInfoSim')[measure],FUPsubs[i].AC_ShirtsTrueCounts[measure][0]))
  AC_ShirtsCountTestDF=AC_ShirtsCountTestDF.append(pd.DataFrame(Pscore).T)
AC_ShirtsCountTestDF.columns=checkList
AC_ShirtsCountTestDF.index=[i for i in FUPsubs.keys()]
AC_ShirtsCountTestDF=AC_ShirtsCountTestDF.T
AC_ShirtsCountTestDF.style.applymap(colorcode)

Unnamed: 0,sub2,sub4,sub6,sub7,sub8,sub9,sub10
aveAllStrLen,0.726,0.836,0.209,0.095,0.4225,0.8935,0.228
ESD,0.4025,0.883,0.412,0.3945,0.41,0.419,0.8605
kD,0.962,0.9815,0.788,0.5535,1.0,0.9425,0.9695


In [14]:
getFinal(AC_ShirtsCountTestDF)

Unnamed: 0,Range,Outlier,kD,None
0,[],[],[8],"[2, 4, 6, 7, 9, 10]"


## All Together

### from counts

In [15]:
counts_together=pd.concat([getFinal(AB_ShirtsCountTestDF),getFinal(AC_ShirtsCountTestDF)])
counts_together.index=['AB','AC']
counts_together['#subs changed']=[len(np.unique([item for sublist in (counts_together.loc[i][:3].values) for item in sublist])) for i in counts_together.index]
counts_together

Unnamed: 0,Range,Outlier,kD,None,#subs changed
AB,[],[],"[1, 8]","[2, 3, 4, 5, 6, 7, 9, 10]",2
AC,[],[],[8],"[2, 4, 6, 7, 9, 10]",1


## Radar Plots

## Follow Up AC/BC

#### from counts

In [18]:
FUPsubs[i].ShirtsCounts

Unnamed: 0,sub2FUP
schloss2.jpg,3
schloss3.jpg,13
schloss4.jpg,3
schloss5.jpg,6
schloss6.jpg,6
schloss7.jpg,10
schloss8.jpg,3
schloss9.jpg,9
schloss10.jpg,6
schloss11.jpg,1
