In [1]:
import sqlite3
import sys
sys.path.append('../python/')
from measures import loadLex, loadSpatialLex, loadMFTypes
from measures import distSim, KLDSim
from analysis import testSigF, testSigGP, compareGP
from analysis import compareG_KLD, compareR_KLD

In [2]:
neg = loadLex('non_words.txt') + loadLex('map_words.txt')
spl_lem_types, spl_all_types, spl_lem = loadSpatialLex()
mf_types = loadMFTypes(neg)
mf_types

['e',
 'ken',
 'okey',
 'az',
 'shel',
 'yesh',
 'lo',
 'ani',
 'ze',
 'et',
 'ad',
 'li',
 'ata',
 'mitaxat',
 'hu',
 'at',
 'axshav',
 'keilu',
 'naxon',
 'aval',
 'mm',
 'a',
 'veaz',
 'meal',
 'lax']

## Analysis per group of terms and measures
1. all measures operate at session level except where noted
2. results per group of terms:
  * overall significance
  * significance per gender pair
  * differences between gender pairs
  * differences between individual genders (asymmetric measure KLDSim only)
  * differences between roles (task level, asymmetric measure KLDSim only)

### Spatial terms

In [3]:
kwargs = {
    'types': spl_lem_types, 'typesID': 'ID_TYPES_SPL',
    'neg': neg, 'negID': 'ID_NEG_STD',
    'lem': spl_lem, 'lemID':'ID_LEM_SPL'
}
f = 'Spatial Terms, distSim'
df = testSigF(distSim, kwargs=kwargs, fs=[f + ', overall'])
print(testSigGP(distSim, kwargs=kwargs, fs=[f + ', per gp']), 
      end='\n\n')
print(compareGP(distSim, kwargs=kwargs, fs=[f + ', gp comp.']), 
      end='\n\n')

Spatial Terms, distSim, overall 7.378786767779509e-06 +6.677950909276285 15

                                f  m fm
Spatial Terms, distSim, per gp  +  +  +

                                 ff vs. mm ff vs. fm mm vs. fm
Spatial Terms, distSim, gp comp.                              



In [4]:
# KLD (using pos to ensure fractions in distributions add up to 1)
kwargs = {
    'types': spl_lem_types, 'typesID': 'ID_TYPES_SPL',
    'pos': spl_all_types, 'posID': 'ID_POS_SPL',
    'neg': neg, 'negID': 'ID_NEG_STD',
    'lem': spl_lem, 'lemID':'ID_LEM_SPL'
}
f = 'Spatial Terms, KLDSim'
df = testSigF(KLDSim, kwargs=kwargs, fs=[f + ', overall'])
print(testSigGP(KLDSim, kwargs=kwargs, fs=[f + ', per gp']), 
      end='\n\n')
print(compareGP(KLDSim, kwargs=kwargs, fs=[f + ', gp comp.']), 
      end='\n\n')
dfG = compareG_KLD(f + ', m vs. f', kwargs=kwargs)
kwargs['tsk_or_ses'] = 'tsk'
dfR = compareR_KLD(f + ', d vs. f', kwargs=kwargs)

Spatial Terms, KLDSim, overall 0.0020534884367687787 +3.719942854025464 15

                               f    m fm
Spatial Terms, KLDSim, per gp  +  (+)  +

                                ff vs. mm ff vs. fm mm vs. fm
Spatial Terms, KLDSim, gp comp.                              

Spatial Terms, KLDSim, m vs. f (0.8793821788535847)   30

Spatial Terms, KLDSim, d vs. f (0.5941862115899679)   62



### Most frequent terms

In [5]:
kwargs = {
    'types': mf_types, 'typesID': 'ID_TYPES_MF',
    'neg': neg, 'negID': 'ID_NEG_STD'
}
f = 'Frequent Terms, distSim'
df = testSigF(distSim, kwargs=kwargs, fs=[f + ', overall'])
print(testSigGP(distSim, kwargs=kwargs, fs=[f + ', per gp']), 
      end='\n\n')
print(compareGP(distSim, kwargs=kwargs, fs=[f + ', gp comp.']), 
      end='\n\n')

Frequent Terms, distSim, overall 0.013940145353155778 +2.7826088569040803 15

                                 f  m fm
Frequent Terms, distSim, per gp  +  +   

                                  ff vs. mm ff vs. fm mm vs. fm
Frequent Terms, distSim, gp comp.                              



In [6]:
# KLD (using pos to ensure fractions in distributions add up to 1)
kwargs = {
    'types': mf_types, 'typesID': 'ID_TYPES_MF',
    'pos': mf_types, 'posID': 'ID_POS_MF',
    'neg': neg, 'negID': 'ID_NEG_STD'
}
f = 'Frequent Terms, KLDSim'
df = testSigF(KLDSim, kwargs=kwargs, fs=[f + ', overall'])
print(testSigGP(KLDSim, kwargs=kwargs, fs=[f + ', per gp']), 
      end='\n\n')
print(compareGP(KLDSim, kwargs=kwargs, fs=[f + ', gp comp.']), 
      end='\n\n')
dfG = compareG_KLD(f + ', m vs. f', kwargs=kwargs)
kwargs['tsk_or_ses'] = 'tsk'
dfR = compareR_KLD(f + ', d vs. f', kwargs=kwargs)

Frequent Terms, KLDSim, overall 0.02320105307666085 +2.5276850639303547 15

                                f  m fm
Frequent Terms, KLDSim, per gp  +  +   

                                 ff vs. mm ff vs. fm mm vs. fm
Frequent Terms, KLDSim, gp comp.                   +         +

Frequent Terms, KLDSim, m vs. f (0.49945852462968565)   30

Frequent Terms, KLDSim, d vs. f (0.5225322849576497)   62



### All terms
(except non-words and map words)

In [7]:
kwargs = {'neg': neg, 'negID': 'ID_NEG_STD'}
f = 'All Terms, distSim'
df = testSigF(distSim, kwargs=kwargs, fs=[f + ', overall'])
print(testSigGP(distSim, kwargs=kwargs, fs=[f + ', per gp']), 
      end='\n\n')
print(compareGP(distSim, kwargs=kwargs, fs=[f + ', gp comp.']), 
      end='\n\n')

All Terms, distSim, overall 1.035846940314289e-09 +13.310337652356731 15

                            f  m fm
All Terms, distSim, per gp  +  +  +

                             ff vs. mm ff vs. fm mm vs. fm
All Terms, distSim, gp comp.                              



In [8]:
# KLD (no pos needed since all terms used)
kwargs = {'neg': neg, 'negID': 'ID_NEG_STD'}
f = 'All Terms, KLDSim'
df = testSigF(KLDSim, kwargs=kwargs, fs=[f + ', overall'])
print(testSigGP(KLDSim, kwargs=kwargs, fs=[f + ', per gp']), 
      end='\n\n')
print(compareGP(KLDSim, kwargs=kwargs, fs=[f + ', gp comp.']), 
      end='\n\n')
dfG = compareG_KLD(f + ', m vs. f', kwargs=kwargs)
kwargs['tsk_or_ses'] = 'tsk'
dfR = compareR_KLD(f + ', d vs. f', kwargs=kwargs)

All Terms, KLDSim, overall 5.123135488503783e-09 +11.851605204248008 15

                           f  m fm
All Terms, KLDSim, per gp  +  +  +

                            ff vs. mm ff vs. fm mm vs. fm
All Terms, KLDSim, gp comp.                   +         +

All Terms, KLDSim, m vs. f (0.7361838939646899)   30

All Terms, KLDSim, d vs. f (0.6284570742041331)   62



In [9]:
# entrainment on percentage of spatial terms overall
spl_lem_gnl = lambda t: 'spl' if t in spl_all_types else ''
kwargs = {
    'types': ['spl'], 'typesID': 'ID_TYPES_SPL_GNL',
    'neg': neg, 'negID': 'ID_NEG_STD',
    'lem': spl_lem_gnl, 'lemID':'ID_LEM_SPL_GNL'
}
df = testSigF(distSim, kwargs=kwargs, 
              fs=['Overall use of spatial terms, distSim'])

Overall use of spatial terms, distSim 0.0005839638550630952 +4.339176929620489 15

