From 141ca8c3bf1c5a79ec67f01ce75ddf0c5e8b71c7 Mon Sep 17 00:00:00 2001 From: Jordeen Chang Date: Sat, 28 Nov 2015 15:09:03 -0800 Subject: [PATCH 1/7] starting sentiment parser --- code/utils/sentiment.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 code/utils/sentiment.py diff --git a/code/utils/sentiment.py b/code/utils/sentiment.py new file mode 100644 index 0000000..174683b --- /dev/null +++ b/code/utils/sentiment.py @@ -0,0 +1,20 @@ +from senti_classifier import senti_classifier +import nltk +import csv +sentences = ['The movie was the worst movie', 'It was the worst acting by the actors'] +pos_score, neg_score = senti_classifier.polarity_scores(sentences) +print pos_score, neg_score +path_to_scene_csv = "../../ds113_study_description/stimulus/task001/annotations/german_audio_description.csv" + +def translate_file(filename): + with open(filename, 'rt') as csvfile: + reader = csv.DictReader( + csvfile, + fieldnames=['start', 'end', 'german-desc']) + for row in reader: + print row + +def get_sentiment(phrase): + print senti_classifier.polarity_scores(phrase) + +get_sentiment(["hello you are ugly and i hate you", "hello you are beautiful and i love you"]) From 165249505bad32aef9e8f9b84a370b52e0e5c871 Mon Sep 17 00:00:00 2001 From: Jordeen Chang Date: Sun, 29 Nov 2015 11:53:03 -0800 Subject: [PATCH 2/7] using textblob library --- code/utils/sentiment.py | 45 ++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/code/utils/sentiment.py b/code/utils/sentiment.py index 174683b..db387fc 100644 --- a/code/utils/sentiment.py +++ b/code/utils/sentiment.py @@ -1,20 +1,45 @@ -from senti_classifier import senti_classifier import nltk import csv -sentences = ['The movie was the worst movie', 'It was the worst acting by the actors'] -pos_score, neg_score = senti_classifier.polarity_scores(sentences) -print pos_score, neg_score +from textblob import TextBlob + path_to_scene_csv = "../../ds113_study_description/stimulus/task001/annotations/german_audio_description.csv" -def translate_file(filename): +def get_polarity_dict(filename): with open(filename, 'rt') as csvfile: reader = csv.DictReader( csvfile, - fieldnames=['start', 'end', 'german-desc']) + fieldnames=['start', 'end', 'german_desc']) for row in reader: - print row + start = float(row['start']) + end = float(row['end']) + blob = TextBlob(row['german_desc']) + translated_blob = blob.translate(to="en") + sentiment = get_sentiment(blob) + print sentiment + +def get_sentiment(blob): + sentences = blob.sentences + sentence_total = len(sentences) + sentiment_total = 0 + for sentence in sentences: + sentiment_total += sentence.sentiment.polarity + return float(sentiment_total) / float(sentence_total) + + + +# get_polarity_dict(path_to_scene_csv) + +# phrase = "Eine Computeranimation: Auf einen schroffen Berg mit schneebedeckter Flanke fliegt eine Reihe Sterne zu. Sie bilden einen Kranz um den Gipfel: 'Paramount'." +phrase = "you are ugly" + + +blob = TextBlob(phrase) +print blob.tags +print blob.noun_phrases +for sentence in blob.sentences: + print(sentence.sentiment.polarity) -def get_sentiment(phrase): - print senti_classifier.polarity_scores(phrase) +print blob.translate(to="en") -get_sentiment(["hello you are ugly and i hate you", "hello you are beautiful and i love you"]) +for sentence in blob.sentences: + print(sentence.sentiment.polarity) From 26c6b8940dc654e9b7f82087d52a4919d2646ef0 Mon Sep 17 00:00:00 2001 From: Jordeen Chang Date: Sun, 29 Nov 2015 16:16:43 -0800 Subject: [PATCH 3/7] using textblob --- code/utils/scene_slicer.py | 14 +++++++++----- code/utils/sentiment.py | 31 ++++++++++++++++--------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/code/utils/scene_slicer.py b/code/utils/scene_slicer.py index b7268b5..3405a9a 100644 --- a/code/utils/scene_slicer.py +++ b/code/utils/scene_slicer.py @@ -9,11 +9,11 @@ INT_IND = 2 EXT_IND = 3 -# with open('../../data/data_path.json', 'r') as fh: -# data_paths = json.load(fh) -# path_to_images = [] -# for i in range(8): -# path_to_images.append("../../" + data_paths['bold_dico_7Tad2grpbold7Tad']['sub1']['runs'][0]['path']) +with open('../../data/data_path.json', 'r') as fh: + data_paths = json.load(fh) +path_to_images = [] +for i in range(8): + path_to_images.append("../../" + data_paths['bold_dico_7Tad2grpbold7Tad']['sub1']['runs'][0]['path']) class SceneSlicer: @@ -89,3 +89,7 @@ def get_day_night(self, run_num, slice): is_day_slice = slice in scene_slices[DAY_IND] is_int_slice = slice in scene_slices[INT_IND] return (is_day_slice, is_int_slice) + + +ss = SceneSlicer(path_to_images) +print ss.get_day_night(0, 500) diff --git a/code/utils/sentiment.py b/code/utils/sentiment.py index db387fc..6d78e52 100644 --- a/code/utils/sentiment.py +++ b/code/utils/sentiment.py @@ -1,21 +1,22 @@ -import nltk import csv from textblob import TextBlob +from math import ceil, floor path_to_scene_csv = "../../ds113_study_description/stimulus/task001/annotations/german_audio_description.csv" def get_polarity_dict(filename): - with open(filename, 'rt') as csvfile: - reader = csv.DictReader( - csvfile, - fieldnames=['start', 'end', 'german_desc']) - for row in reader: - start = float(row['start']) - end = float(row['end']) - blob = TextBlob(row['german_desc']) - translated_blob = blob.translate(to="en") - sentiment = get_sentiment(blob) - print sentiment + with open(filename, 'rt') as csvfile: + reader = csv.DictReader( + csvfile, + fieldnames=['start', 'end', 'german_desc']) + for row in reader: + start = ceil(float(row['start'])) + end = floor(float(row['end'])) + blob = TextBlob(row['german_desc']) + print row['german_desc'] + translated_blob = blob.translate(to="en") + sentiment = get_sentiment(blob) + print sentiment def get_sentiment(blob): sentences = blob.sentences @@ -27,10 +28,10 @@ def get_sentiment(blob): -# get_polarity_dict(path_to_scene_csv) +get_polarity_dict(path_to_scene_csv) -# phrase = "Eine Computeranimation: Auf einen schroffen Berg mit schneebedeckter Flanke fliegt eine Reihe Sterne zu. Sie bilden einen Kranz um den Gipfel: 'Paramount'." -phrase = "you are ugly" +phrase = "Eine Computeranimation: Auf einen schroffen Berg mit schneebedeckter Flanke fliegt eine Reihe Sterne zu. Sie bilden einen Kranz um den Gipfel: 'Paramount'." +# phrase = "you are ugly" blob = TextBlob(phrase) From 61ff1c9d1685421c2ede2f48076ecf221dac4cfb Mon Sep 17 00:00:00 2001 From: Jordeen Chang Date: Sun, 29 Nov 2015 16:38:25 -0800 Subject: [PATCH 4/7] returning list of 0 and 1 instead of separate arrays --- code/utils/scene_slicer.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/code/utils/scene_slicer.py b/code/utils/scene_slicer.py index 3405a9a..b94f234 100644 --- a/code/utils/scene_slicer.py +++ b/code/utils/scene_slicer.py @@ -9,6 +9,9 @@ INT_IND = 2 EXT_IND = 3 +DAY_NIGHT_IND = 0 +INT_EXT_IND = 1 + with open('../../data/data_path.json', 'r') as fh: data_paths = json.load(fh) path_to_images = [] @@ -59,6 +62,10 @@ def get_scene_slices(self, run_num): night_slices = [] int_slices = [] ext_slices = [] + + day_night = [] + int_ext = [] + img = self.images[run_num] key_index = 0 @@ -74,22 +81,23 @@ def get_scene_slices(self, run_num): i * 2) + scene_start >= self.scene_keys[key_index + 1]: key_index += 1 curr_time = self.scene_keys[key_index] - day_slices.append(i) if self.scene_desc[curr_time][ - IS_DAY] else night_slices.append(i) - int_slices.append(i) if self.scene_desc[curr_time][ - IS_INT] else ext_slices.append(i) - self.scene_slices[run_num] = (day_slices, night_slices, int_slices, - ext_slices) + day_night.append(0) if self.scene_desc[curr_time][ + IS_DAY] else day_night.append(1) + int_ext.append(0) if self.scene_desc[curr_time][ + IS_INT] else int_ext.append(1) + print len(day_night) + print len(int_ext) + self.scene_slices[run_num] = (day_night, int_ext) return self.scene_slices[run_num] def get_day_night(self, run_num, slice): if not self.scene_slices[run_num]: self.get_scene_slices(run_num) scene_slices = self.scene_slices[run_num] - is_day_slice = slice in scene_slices[DAY_IND] - is_int_slice = slice in scene_slices[INT_IND] + is_day_slice = scene_slices[DAY_NIGHT_IND][slice] == 0 + is_int_slice = scene_slices[INT_EXT_IND][slice] == 0 return (is_day_slice, is_int_slice) ss = SceneSlicer(path_to_images) -print ss.get_day_night(0, 500) +print ss.get_scene_slices(0) From be5ea63d5acd25a1d48de0351e3fa1efa2050f49 Mon Sep 17 00:00:00 2001 From: Jordeen Chang Date: Sun, 29 Nov 2015 17:44:04 -0800 Subject: [PATCH 5/7] removing runs separation --- code/utils/scene_slicer.py | 64 ++++++++------------------- code/utils/tests/test_scene_slicer.py | 15 ++----- 2 files changed, 23 insertions(+), 56 deletions(-) diff --git a/code/utils/scene_slicer.py b/code/utils/scene_slicer.py index b94f234..53f6bd2 100644 --- a/code/utils/scene_slicer.py +++ b/code/utils/scene_slicer.py @@ -1,34 +1,28 @@ import nibabel as nib import csv +import numpy as np import json IS_DAY = 0 IS_INT = 1 -DAY_IND = 0 -NIGHT_IND = 1 -INT_IND = 2 -EXT_IND = 3 DAY_NIGHT_IND = 0 INT_EXT_IND = 1 -with open('../../data/data_path.json', 'r') as fh: - data_paths = json.load(fh) -path_to_images = [] -for i in range(8): - path_to_images.append("../../" + data_paths['bold_dico_7Tad2grpbold7Tad']['sub1']['runs'][0]['path']) +# with open('../../data/data_path.json', 'r') as fh: +# data_paths = json.load(fh) +# path_to_subject_image = "../../" + data_paths['bold_dico_7Tad2grpbold7Tad']['sub1']['runs'][0]['path'] class SceneSlicer: def __init__( self, - path_to_images, + path_to_subject_image, path_to_scene_csv="../../ds113_study_description/stimulus/task001/annotations/scenes.csv" ): - self.path_to_images = path_to_images self.path_to_scene_csv = path_to_scene_csv - self.images = [0] * len(path_to_images) - self.scene_slices = [0] * len(path_to_images) + self.image = nib.load(path_to_subject_image) + self.scene_slices = [] self.segment_duration = [902, 882, 876, 976, 924, 878, 1086, 673.4] self.scene_desc = {} self.scene_keys = [] @@ -45,38 +39,19 @@ def generate_scene_desc_dict(self): self.scene_keys = list(self.scene_desc.keys()) self.scene_keys.sort() - def get_image(self, run_num): - if self.images[run_num] == 0: - img = nib.load(self.path_to_images[run_num]) - self.images[run_num] = img - return self.images[run_num] - - def get_scene_slices(self, run_num): - if not self.images[run_num]: - self.get_image(run_num) + def get_scene_slices(self): if not self.scene_keys: self.generate_scene_desc_dict() - if not self.scene_slices[run_num]: - - day_slices = [] - night_slices = [] - int_slices = [] - ext_slices = [] - + if not self.scene_slices: day_night = [] int_ext = [] - - img = self.images[run_num] - key_index = 0 scene_start = 0 - for i in range(run_num): - scene_start += self.segment_duration[i] for i in range(len(self.scene_keys)): if self.scene_keys[i] > scene_start: key_index = i break - for i in range(img.shape[3]): + for i in range(self.image.shape[3]): if key_index + 1 < len(self.scene_keys) and ( i * 2) + scene_start >= self.scene_keys[key_index + 1]: key_index += 1 @@ -87,17 +62,16 @@ def get_scene_slices(self, run_num): IS_INT] else int_ext.append(1) print len(day_night) print len(int_ext) - self.scene_slices[run_num] = (day_night, int_ext) - return self.scene_slices[run_num] + self.scene_slices = (day_night, int_ext) + return self.scene_slices - def get_day_night(self, run_num, slice): - if not self.scene_slices[run_num]: - self.get_scene_slices(run_num) - scene_slices = self.scene_slices[run_num] - is_day_slice = scene_slices[DAY_NIGHT_IND][slice] == 0 - is_int_slice = scene_slices[INT_EXT_IND][slice] == 0 + def get_day_night(self, slice): + if not self.scene_slices: + self.get_scene_slices() + is_day_slice = self.scene_slices[DAY_NIGHT_IND][slice] == 0 + is_int_slice = self.scene_slices[INT_EXT_IND][slice] == 0 return (is_day_slice, is_int_slice) -ss = SceneSlicer(path_to_images) -print ss.get_scene_slices(0) +# ss = SceneSlicer(path_to_subject_image) +# print ss.get_scene_slices() diff --git a/code/utils/tests/test_scene_slicer.py b/code/utils/tests/test_scene_slicer.py index c37eb2f..08b376a 100644 --- a/code/utils/tests/test_scene_slicer.py +++ b/code/utils/tests/test_scene_slicer.py @@ -20,7 +20,7 @@ def test_prepare(): scenewriter = csv.writer(csvfile, delimiter=',', quotechar='"') scenewriter.writerow([17.0, "SAVANNAH", "DAY", "EXT"]) scenewriter.writerow([272.0, "DOCTORS OFFICE", "DAY", "INT"]) - ss = scene_slicer.SceneSlicer(['test_data.nii'], 'scene.csv') + ss = scene_slicer.SceneSlicer('test_data.nii', 'scene.csv') return ss @@ -47,22 +47,15 @@ def test_scene_slicer_dict(): delete_files() -def test_scene_slicer_image(): - ss = test_prepare() - ss.get_image(0) - assert ss.images[0] != 0 - delete_files() - - def test_scene_slicer_slices(): ss = test_prepare() - ss.get_scene_slices(0) - assert ss.scene_slices[0] != 0 + ss.get_scene_slices() + assert len(ss.scene_slices) != 0 delete_files() def test_scene_slicer_day_night(): ss = test_prepare() - scene_tup = ss.get_day_night(0, 0) + scene_tup = ss.get_day_night(0) assert scene_tup == (True, False) delete_files() From 432ac5c8cfeac91e070115a439951667e96795aa Mon Sep 17 00:00:00 2001 From: Jordeen Chang Date: Sun, 29 Nov 2015 18:19:32 -0800 Subject: [PATCH 6/7] converting german to ascii, handling translation exceptions --- code/utils/sentiment.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/code/utils/sentiment.py b/code/utils/sentiment.py index 6d78e52..5a45df8 100644 --- a/code/utils/sentiment.py +++ b/code/utils/sentiment.py @@ -1,9 +1,11 @@ import csv -from textblob import TextBlob +import textblob as tb from math import ceil, floor +import unicodedata path_to_scene_csv = "../../ds113_study_description/stimulus/task001/annotations/german_audio_description.csv" + def get_polarity_dict(filename): with open(filename, 'rt') as csvfile: reader = csv.DictReader( @@ -12,10 +14,15 @@ def get_polarity_dict(filename): for row in reader: start = ceil(float(row['start'])) end = floor(float(row['end'])) - blob = TextBlob(row['german_desc']) - print row['german_desc'] - translated_blob = blob.translate(to="en") - sentiment = get_sentiment(blob) + desc = row['german_desc'] + desc = unicode(desc, "utf-8") + desc = unicodedata.normalize('NFKD', desc).encode('ascii', 'ignore') + blob = tb.TextBlob(desc) + try: + translated_blob = blob.translate(from_lang="de", to="en") + except tb.exceptions.NotTranslated: + pass + sentiment = get_sentiment(translated_blob) print sentiment def get_sentiment(blob): @@ -29,18 +36,3 @@ def get_sentiment(blob): get_polarity_dict(path_to_scene_csv) - -phrase = "Eine Computeranimation: Auf einen schroffen Berg mit schneebedeckter Flanke fliegt eine Reihe Sterne zu. Sie bilden einen Kranz um den Gipfel: 'Paramount'." -# phrase = "you are ugly" - - -blob = TextBlob(phrase) -print blob.tags -print blob.noun_phrases -for sentence in blob.sentences: - print(sentence.sentiment.polarity) - -print blob.translate(to="en") - -for sentence in blob.sentences: - print(sentence.sentiment.polarity) From 0229ac51e57b8e9acd3c854ad87e8c173fefc09d Mon Sep 17 00:00:00 2001 From: Jordeen Chang Date: Sun, 29 Nov 2015 19:38:03 -0800 Subject: [PATCH 7/7] fixing travis problems --- code/utils/scene_slicer.py | 3 --- code/utils/sentiment.py | 1 - 2 files changed, 4 deletions(-) diff --git a/code/utils/scene_slicer.py b/code/utils/scene_slicer.py index 53f6bd2..cc98870 100644 --- a/code/utils/scene_slicer.py +++ b/code/utils/scene_slicer.py @@ -60,8 +60,6 @@ def get_scene_slices(self): IS_DAY] else day_night.append(1) int_ext.append(0) if self.scene_desc[curr_time][ IS_INT] else int_ext.append(1) - print len(day_night) - print len(int_ext) self.scene_slices = (day_night, int_ext) return self.scene_slices @@ -72,6 +70,5 @@ def get_day_night(self, slice): is_int_slice = self.scene_slices[INT_EXT_IND][slice] == 0 return (is_day_slice, is_int_slice) - # ss = SceneSlicer(path_to_subject_image) # print ss.get_scene_slices() diff --git a/code/utils/sentiment.py b/code/utils/sentiment.py index 5a45df8..1c03dcc 100644 --- a/code/utils/sentiment.py +++ b/code/utils/sentiment.py @@ -23,7 +23,6 @@ def get_polarity_dict(filename): except tb.exceptions.NotTranslated: pass sentiment = get_sentiment(translated_blob) - print sentiment def get_sentiment(blob): sentences = blob.sentences