In [1]:
import pandas as pd
import spacy
import numpy as np
from transform_data_by_response import transform_data_by_response
from fluency import calc_fluency
from flexibility_elaboration import calc_flexibility_and_elaboration, calc_flexibility_and_elaboration_multi_target
from originality import calc_originality

In [2]:
nlp = spacy.load('en_vectors_web_lg')


In [3]:
def z_score(array):
     return (array - np.nanmean(array)) / np.std(array)


# unusual uses

variable unusualUses_1 holds "pen" data, unusualUses_2 has "megaphone" data. 
Separate these for fall and spring datasets.

In [2]:
target_1 = u'pen'
target_2 = u'megaphone'
fall17_uu_raw = pd.read_csv('data/20190919_fall17_unusualUses.csv')
fall17_uu_pen = transform_data_by_response(fall17_uu_raw, delimiter='/n', id_column='ID',
                                           response_column='unusualUses_1')
fall17_uu_meg = transform_data_by_response(fall17_uu_raw, delimiter='/n', id_column='ID',
                                           response_column='unusualUses_2')
spring18_uu_raw = pd.read_csv('data/20190919_spring18_unusualUses.csv')
spring18_uu_pen = transform_data_by_response(spring18_uu_raw, delimiter='/n', id_column='ID',
                                             response_column='unusualUses_1')
spring18_uu_meg = transform_data_by_response(spring18_uu_raw, delimiter='/n', id_column='ID',
                                             response_column='unusualUses_2')

out_data = calc_all_creativity(fall17_uu_pen, target_1, nlp=nlp, output_prefix='uu_1_')

fall17_pen_results = pd.DataFrame({'responseID': fall17_uu_pen.responseID, 'ID': fall17_uu_pen.ID})
spring18_pen_results = pd.DataFrame({'responseID': spring18_uu_pen.responseID, 'ID': spring18_uu_pen.ID})
fall17_meg_results = pd.DataFrame({'responseID': fall17_uu_meg.responseID, 'ID': fall17_uu_meg.ID})
spring18_meg_results = pd.DataFrame({'responseID': spring18_uu_pen.responseID, 'ID': spring18_uu_pen.ID})

In [4]:
fall17_pen_results['fluency'] = calc_fluency(fall17_uu_pen, nlp)
fall17_pen_results[['clean_response', 'elaboration', 'flexibility']] = \
     calc_flexibility_and_elaboration(list(fall17_uu_pen.response), target_1, nlp)
fall17_pen_results['originality'] = calc_originality(fall17_uu_pen.response)


correcting flexibility for word count...
bootstrapping at word count 1
bootstrapping at word count 2
bootstrapping at word count 3
bootstrapping at word count 4
bootstrapping at word count 6
bootstrapping at word count 5
bootstrapping at word count 7
bootstrapping at word count 8
bootstrapping at word count 0
bootstrapping at word count 9
bootstrapping at word count 10
bootstrapping at word count 12
bootstrapping at word count 11


In [5]:
fall17_meg_results['fluency'] = calc_fluency(fall17_uu_meg, nlp)
fall17_meg_results[['clean_response', 'elaboration', 'flexibility']] = \
     calc_flexibility_and_elaboration(list(fall17_uu_meg.response), target_2, nlp)
fall17_meg_results['originality'] = calc_originality(fall17_uu_meg.response)

correcting flexibility for word count...
bootstrapping at word count 1
bootstrapping at word count 2
bootstrapping at word count 3
bootstrapping at word count 4
bootstrapping at word count 6
bootstrapping at word count 5
bootstrapping at word count 7
bootstrapping at word count 9
bootstrapping at word count 10
bootstrapping at word count 0
bootstrapping at word count 8


In [6]:
fall17_pen_results['z_elaboration'] = z_score(fall17_pen_results.elaboration)
fall17_pen_results['z_flexibility'] = z_score(fall17_pen_results.flexibility)
fall17_pen_results['z_originality'] = z_score(fall17_pen_results.originality)
fall17_meg_results['z_elaboration'] = z_score(fall17_meg_results.elaboration)
fall17_meg_results['z_flexibility'] = z_score(fall17_meg_results.flexibility)
fall17_meg_results['z_originality'] = z_score(fall17_meg_results.originality)

fall17_creativity = pd.DataFrame({'ID': fall17_pen_results.ID.unique()})
fall17_creativity['uu_1_elaboration'] = fall17_creativity.apply(
    lambda row:
        np.nanmean(fall17_pen_results.loc[fall17_pen_results.ID == row.ID, 'z_elaboration']),
    axis=1
)
fall17_creativity['uu_2_elaboration'] = fall17_creativity.apply(
    lambda row:
        np.nanmean(fall17_meg_results.loc[fall17_meg_results.ID == row.ID, 'z_elaboration']),
    axis=1
)
fall17_creativity['uu_1_flexibility'] = fall17_creativity.apply(
    lambda row:
        np.nanmean(fall17_pen_results.loc[fall17_pen_results.ID == row.ID, 'z_flexibility']),
    axis=1
)
fall17_creativity['uu_2_flexibility'] = fall17_creativity.apply(
    lambda row:
        np.nanmean(fall17_meg_results.loc[fall17_meg_results.ID == row.ID, 'z_flexibility']),
    axis=1
)
fall17_creativity['uu_1_originality'] = fall17_creativity.apply(
    lambda row:
        np.nanmean(fall17_pen_results.loc[fall17_pen_results.ID == row.ID, 'z_originality']),
    axis=1
)
fall17_creativity['uu_2_originality'] = fall17_creativity.apply(
    lambda row:
        np.nanmean(fall17_meg_results.loc[fall17_meg_results.ID == row.ID, 'z_originality']),
    axis=1
)
fall17_creativity['uu_1_raw_fluency'] = fall17_creativity.apply(
    lambda row:
        np.nanmean(fall17_pen_results.loc[fall17_pen_results.ID == row.ID, 'fluency']),
    axis=1
)
fall17_creativity['uu_2_raw_fluency'] = fall17_creativity.apply(
    lambda row:
        np.nanmean(fall17_meg_results.loc[fall17_meg_results.ID == row.ID, 'fluency']),
    axis=1
)
fall17_creativity['uu_1_fluency'] = z_score(fall17_creativity['uu_1_raw_fluency'])
fall17_creativity['uu_2_fluency'] = z_score(fall17_creativity['uu_2_raw_fluency'])
fall17_creativity['uu_1_creativity_score'] = fall17_creativity[['uu_1_elaboration', 'uu_1_flexibility', 
                                                                'uu_1_originality', 'uu_1_fluency']].mean(axis=1)
fall17_creativity['uu_2_creativity_score'] = fall17_creativity[['uu_2_elaboration', 'uu_2_flexibility', 
                                                                'uu_2_originality', 'uu_2_fluency']].mean(axis=1)

In [7]:
#fall17_creativity.to_csv('output/20191017_fall17_unusual_uses.csv', index=False)

# Social Creativity

## task B

In [4]:
fall17_sc_B_raw = pd.read_csv('data/20190919_fall17_creativeB_long.csv')
fall17_sc_B_raw['trialID'] = fall17_sc_B_raw.ID + '_' + fall17_sc_B_raw.trial.map(str)
fall17_sc_B = transform_data_by_response(fall17_sc_B_raw, delimiter='/n', id_column='trialID')
fall17_sc_B['target_word'] = fall17_sc_B.apply(lambda row: 
                                               list(fall17_sc_B_raw.loc[fall17_sc_B_raw.trialID == row.ID, 'word'])[0],
                                               axis=1)

In [5]:
fall17_sc_B_results = pd.DataFrame({'responseID': fall17_sc_B.responseID, 'ID': fall17_sc_B.ID})
fall17_sc_B_results['fluency'] = calc_fluency(fall17_sc_B, nlp)
fall17_sc_B_results[['clean_response', 'elaboration', 'flexibility']] = \
     calc_flexibility_and_elaboration_multi_target(list(fall17_sc_B.response), list(fall17_sc_B.target_word), nlp)
fall17_sc_B_results['originality'] = calc_originality(fall17_sc_B.response)

correcting flexibility for word count...
bootstrapping at word count 2
bootstrapping at word count 3
bootstrapping at word count 4
bootstrapping at word count 5
bootstrapping at word count 1
bootstrapping at word count 0
bootstrapping at word count 9
bootstrapping at word count 6
bootstrapping at word count 7
bootstrapping at word count 8
bootstrapping at word count 11
bootstrapping at word count 10
bootstrapping at word count 13
bootstrapping at word count 16


In [6]:
fall17_sc_B_results['z_elaboration'] = z_score(fall17_sc_B_results.elaboration)
fall17_sc_B_results['z_flexibility'] = z_score(fall17_sc_B_results.flexibility)
fall17_sc_B_results['z_originality'] = z_score(fall17_sc_B_results.originality)

fall17_sc_B_final = pd.DataFrame({'ID': fall17_sc_B_results.ID.unique()})
fall17_sc_B_final['sc_B_elaboration'] = fall17_sc_B_final.apply(
    lambda row:
        np.nanmean(fall17_sc_B_results.loc[fall17_sc_B_results.ID == row.ID, 'z_elaboration']),
    axis=1
)
fall17_sc_B_final['sc_B_flexibility'] = fall17_sc_B_final.apply(
    lambda row:
        np.nanmean(fall17_sc_B_results.loc[fall17_sc_B_results.ID == row.ID, 'z_flexibility']),
    axis=1
)
fall17_sc_B_final['sc_B_originality'] = fall17_sc_B_final.apply(
    lambda row:
        np.nanmean(fall17_sc_B_results.loc[fall17_sc_B_results.ID == row.ID, 'z_originality']),
    axis=1
)
fall17_sc_B_final['sc_B_raw_fluency'] = fall17_sc_B_final.apply(
    lambda row:
        np.nanmean(fall17_sc_B_results.loc[fall17_sc_B_results.ID == row.ID, 'fluency']),
    axis=1
)
fall17_sc_B_final['sc_B_fluency'] = z_score(fall17_sc_B_final['sc_B_raw_fluency'])
fall17_sc_B_final['sc_B_creativity_score'] = fall17_sc_B_final[['sc_B_elaboration', 'sc_B_flexibility', 
                                                                'sc_B_originality', 'sc_B_fluency']].mean(axis=1)

In [7]:
#fall17_sc_B_final.to_csv('output/20191017_fall17_creativeB.csv', index=False)