In [1]:
# imports
import pandas as pd
import numpy as np
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
import seaborn as sns
import re
import collections

import plotly.graph_objs as go
import plotly.offline as py
from plotly.offline import init_notebook_mode,iplot
from plotly import tools
init_notebook_mode(connected = True)
import plotly.figure_factory as ff


import string
import spacy
import gensim
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer,TfidfVectorizer  
import nltk

from nltk.corpus import stopwords
nltk.download('stopwords')
from afinn import Afinn
import unicodedata

from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.corpus import wordnet



[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/cecilia/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [35]:
#load dataset
pickfu_r= pd.read_csv("pickfu_data0612.csv",index_col=0)
pickfu_r["Explanation"]= pickfu_r["Explanation"].str.lower()
pickfu_r.head()

Unnamed: 0,Gender Identity,Income Range,Racial Or Ethnic Identity,Education Level,Age Range,Favorite Mobile Gaming Genres,Mobile Gaming Frequency,Explanation
0,Male,$101k+,White,Bachelor's Degree,35-44,Racing; Arcade; Board; Adventure; Action,Daily,this character has the most sinister and deadl...
1,Male,$31-60k,Asian,Bachelor's Degree,35-44,Action; Arcade; Role Playing; Sports,Daily,d is the character design that i think fits th...
2,Male,$31-60k,White,Bachelor's Degree,25-34,Sports; Strategy; Arcade; Adventure; Action,Daily,the character in option b has the most matchin...
3,Male,$0-30k,Hispanic,Vocational Training,25-34,Strategy; Role Playing; Action,Never,this outfit seems the most fitting to his pers...
4,Male,$61-100k,Asian,Bachelor's Degree,21-24,Educational,Daily,the picture is best fit with the the descripti...


In [97]:
pickfu_r_text_review = pickfu_r["Explanation"].str.split(".")

In [98]:
pickfu_r_text_review

0       [this character has the most sinister and dead...
1       [d is the character design that i think fits t...
2       [the character in option b has the most matchi...
3       [this outfit seems the most fitting to his per...
4       [the picture is best fit with the the descript...
                              ...                        
4037    [option b is my choice for this type of charac...
4038               [c looks the best fleshed out drawing]
4039    [my top choice looks more like she is about to...
4040    [i really love option c because she appears li...
4041    [i think all of them look really good,   they'...
Name: Explanation, Length: 4042, dtype: object

In [99]:
import itertools

list2d = pickfu_r_text_review
merged = list(itertools.chain(*list2d))

In [100]:
merged

['this character has the most sinister and deadly look due to the weaponry, stance, and overall facial features',
 'd is the character design that i think fits the description best',
 ' this character has a bolder and more futuristic style',
 '',
 'the character in option b has the most matching description and i like the olive drab uniform',
 'this outfit seems the most fitting to his personality, as well as using a sniper rifle',
 '',
 'the picture is best fit with the the description inside most attractive sociality anixious  sensitive and the person is looking is like this beautiful and the dressing in this position is cute in this person',
 'the picture is best fit with the the description inside most attractive sociality anixious  sensitive and the person is looking is like this beautiful and the dressing in this position is cute in this person and critical movement is like this person',
 'picked in order of what fit a university professor best',
 '',
 ' although i could easily f

In [101]:
pickfu_r_split= pd.DataFrame(merged,columns=["Explanation"])
pickfu_r_split.head()

Unnamed: 0,Explanation
0,this character has the most sinister and deadl...
1,d is the character design that i think fits th...
2,this character has a bolder and more futurist...
3,
4,the character in option b has the most matchin...


### Computing sentiment score:
- according to comments that include target words
- rate all the comments and compute num of positives/ num of comments total

#### realistic

In [102]:
realistic = pickfu_r_split[pickfu_r_split['Explanation'].str.contains('realistic', regex=False, case=False, na=False)]
realistic.head()

Unnamed: 0,Explanation
129,he looks more realistic and i really like the...
302,i prefer the ones that seem more realistic and...
488,"i thin she also looks very realistic, which m..."
521,"this character seems more realistic, approacha..."
546,a is wearing super bright colors which just do...


In [103]:
realistic.shape

(413, 1)

In [104]:
# Define a function to clean the text
def clean(text):
# Removes all special characters and numericals leaving the alphabets
    text = re.sub('[^A-Za-z]+', ' ', text)
    return text

# Cleaning the text in the review column
realistic['Cleaned Reviews'] = realistic['Explanation'].apply(clean)

In [105]:
# POS tagger dictionary
pos_dict = {'J':wordnet.ADJ, 'V':wordnet.VERB, 'N':wordnet.NOUN, 'R':wordnet.ADV}

def token_stop_pos(text):
    tags = pos_tag(word_tokenize(text))
    newlist = []
    for word, tag in tags:
        if word.lower() not in set(stopwords.words('english')):
            newlist.append(tuple([word, pos_dict.get(tag[0])]))
    return newlist

realistic['POS tagged'] = realistic['Cleaned Reviews'].apply(token_stop_pos)

In [106]:
#lemma
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()

def lemmatize(pos_data):
    lemma_rew = " "
    for word, pos in pos_data:
        if not pos: 
            lemma = word
            lemma_rew = lemma_rew + " " + lemma
        else:  
            lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
            lemma_rew = lemma_rew + " " + lemma
    return lemma_rew
    
realistic['Lemma'] = realistic['POS tagged'].apply(lemmatize)

In [107]:
#text blob
from textblob import TextBlob

# function to calculate subjectivity 
def getSubjectivity(review):
    return TextBlob(review).sentiment.subjectivity

# function to calculate polarity
def getPolarity(review):
    return TextBlob(review).sentiment.polarity

# function to analyze the reviews
def analysis(score):
    if score < 0:
        return 'Negative'
    elif score == 0:
        return 'Neutral'
    else:
        return 'Positive'

In [108]:
realistic_data = pd.DataFrame(realistic[['Explanation', 'Lemma']])
realistic_data['Polarity'] = realistic_data['Lemma'].apply(getPolarity) 
realistic_data['Analysis'] = realistic_data['Polarity'].apply(analysis)

In [109]:
#VADER
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# function to calculate vader sentiment  
def vadersentimentanalysis(review):
    vs = analyzer.polarity_scores(review)
    return vs['compound']

realistic_data['Vader Sentiment'] = realistic_data['Lemma'].apply(vadersentimentanalysis)
# function to analyse 
def vader_analysis(compound):
    if compound >= 0.5:
        return 'Positive'
    elif compound <= -0.5 :
        return 'Negative'
    else:
        return 'Neutral'
    
realistic_data['Vader Analysis'] = realistic_data['Vader Sentiment'].apply(vader_analysis)
realistic_data.head()

Unnamed: 0,Explanation,Lemma,Polarity,Analysis,Vader Sentiment,Vader Analysis
129,he looks more realistic and i really like the...,look realistic really like use bow arrow,0.183333,Positive,0.4201,Neutral
302,i prefer the ones that seem more realistic and...,prefer one seem realistic less top design,0.166667,Positive,0.1298,Neutral
488,"i thin she also looks very realistic, which m...",thin also look realistic make character beli...,0.088889,Positive,0.34,Neutral
521,"this character seems more realistic, approacha...",character seem realistic approachable fit ba...,-0.077778,Negative,0.3612,Neutral
546,a is wearing super bright colors which just do...,wear super bright color seem realistic think...,0.425,Positive,0.875,Positive


In [121]:
textblob_realistic = realistic_data['Analysis'].value_counts()
textblob_realistic.Positive/textblob_realistic.sum()*100

78.69249394673123

#### Futuristic

In [111]:
futuristic = pickfu_r_split[pickfu_r_split['Explanation'].str.contains('futuristic', regex=False, case=False, na=False)]
futuristic.head()

Unnamed: 0,Explanation
2,this character has a bolder and more futurist...
145,i think option d looks the like it fits the de...
673,"i hate fantasy, sexual, and futuristic stuff f..."
1382,"e and a tell us it's futuristic, as does d to..."
1458,"to start off, option d has a perfect futuristi..."


In [112]:
futuristic.shape

(39, 1)

In [113]:
# Define a function to clean the text
def clean(text):
# Removes all special characters and numericals leaving the alphabets
    text = re.sub('[^A-Za-z]+', ' ', text)
    return text

# Cleaning the text in the review column
futuristic['Cleaned Reviews'] = futuristic['Explanation'].apply(clean)

# POS tagger dictionary
pos_dict = {'J':wordnet.ADJ, 'V':wordnet.VERB, 'N':wordnet.NOUN, 'R':wordnet.ADV}

def token_stop_pos(text):
    tags = pos_tag(word_tokenize(text))
    newlist = []
    for word, tag in tags:
        if word.lower() not in set(stopwords.words('english')):
            newlist.append(tuple([word, pos_dict.get(tag[0])]))
    return newlist

futuristic['POS tagged'] = futuristic['Cleaned Reviews'].apply(token_stop_pos)

from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()

def lemmatize(pos_data):
    lemma_rew = " "
    for word, pos in pos_data:
        if not pos: 
            lemma = word
            lemma_rew = lemma_rew + " " + lemma
        else:  
            lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
            lemma_rew = lemma_rew + " " + lemma
    return lemma_rew
    
futuristic['Lemma'] = futuristic['POS tagged'].apply(lemmatize)
futuristic.head()

Unnamed: 0,Explanation,Cleaned Reviews,POS tagged,Lemma
2,this character has a bolder and more futurist...,this character has a bolder and more futurist...,"[(character, n), (bolder, n), (futuristic, a),...",character bolder futuristic style
145,i think option d looks the like it fits the de...,i think option d looks the like it fits the de...,"[(think, v), (option, n), (looks, v), (like, N...",think option look like fit description sligh...
673,"i hate fantasy, sexual, and futuristic stuff f...",i hate fantasy sexual and futuristic stuff for...,"[(hate, v), (fantasy, a), (sexual, a), (futuri...",hate fantasy sexual futuristic stuff future ...
1382,"e and a tell us it's futuristic, as does d to...",e and a tell us it s futuristic as does d to ...,"[(e, n), (tell, n), (us, None), (futuristic, a...",e tell us futuristic less extent
1458,"to start off, option d has a perfect futuristi...",to start off option d has a perfect futuristic...,"[(start, v), (option, n), (perfect, a), (futur...",start option perfect futuristic title


In [114]:
futuristic_data = pd.DataFrame(futuristic[['Explanation', 'Lemma']])
futuristic_data['Polarity'] = futuristic_data['Lemma'].apply(getPolarity) 
futuristic_data['Analysis'] = futuristic_data['Polarity'].apply(analysis)
#vader
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# function to calculate vader sentiment  
def vadersentimentanalysis(review):
    vs = analyzer.polarity_scores(review)
    return vs['compound']

futuristic_data['Vader Sentiment'] = futuristic_data['Lemma'].apply(vadersentimentanalysis)
# function to analyse 
def vader_analysis(compound):
    if compound >= 0.5:
        return 'Positive'
    elif compound <= -0.5 :
        return 'Negative'
    else:
        return 'Neutral'
    
futuristic_data['Vader Analysis'] = futuristic_data['Vader Sentiment'].apply(vader_analysis)
futuristic_data.head()

Unnamed: 0,Explanation,Lemma,Polarity,Analysis,Vader Sentiment,Vader Analysis
2,this character has a bolder and more futurist...,character bolder futuristic style,0.0,Neutral,0.296,Neutral
145,i think option d looks the like it fits the de...,think option look like fit description sligh...,0.196131,Positive,0.875,Positive
673,"i hate fantasy, sexual, and futuristic stuff f...",hate fantasy sexual futuristic stuff future ...,-0.1,Negative,-0.5719,Negative
1382,"e and a tell us it's futuristic, as does d to...",e tell us futuristic less extent,-0.166667,Negative,0.0,Neutral
1458,"to start off, option d has a perfect futuristi...",start option perfect futuristic title,1.0,Positive,0.5719,Positive


In [122]:
textblob_futuristic = futuristic_data['Analysis'].value_counts()
textblob_futuristic.Positive/textblob_futuristic.sum()*100

46.15384615384615

In [124]:
textblob_futuristic

Positive    18
Neutral     15
Negative     6
Name: Analysis, dtype: int64

#### Cartoon:
- "cartoony","cartoonish","cartoon" 

In [116]:
#cartoon_words= ['cartoony','cartoonish','cartoon']
cartoon = pickfu_r_split[(pickfu_r_split['Explanation'].str.contains("cartoony|cartoonish|cartoon"))]
cartoon.head()

Unnamed: 0,Explanation
1001,a is a little cartoonish compared to the oth...
1208,i like b the most because it gives off a carto...
1841,all of the other items either seem too carto...
1844,the drawing seems a bit cartoonish though
2044,the lower ranked options seemed to outlandish...


In [117]:
cartoon.shape

(189, 1)

In [118]:
# Define a function to clean the text
def clean(text):
# Removes all special characters and numericals leaving the alphabets
    text = re.sub('[^A-Za-z]+', ' ', text)
    return text

# Cleaning the text in the review column
cartoon['Cleaned Reviews'] = cartoon['Explanation'].apply(clean)

# POS tagger dictionary
pos_dict = {'J':wordnet.ADJ, 'V':wordnet.VERB, 'N':wordnet.NOUN, 'R':wordnet.ADV}

def token_stop_pos(text):
    tags = pos_tag(word_tokenize(text))
    newlist = []
    for word, tag in tags:
        if word.lower() not in set(stopwords.words('english')):
            newlist.append(tuple([word, pos_dict.get(tag[0])]))
    return newlist

cartoon['POS tagged'] = cartoon['Cleaned Reviews'].apply(token_stop_pos)

from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()

def lemmatize(pos_data):
    lemma_rew = " "
    for word, pos in pos_data:
        if not pos: 
            lemma = word
            lemma_rew = lemma_rew + " " + lemma
        else:  
            lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
            lemma_rew = lemma_rew + " " + lemma
    return lemma_rew
    
cartoon['Lemma'] = cartoon['POS tagged'].apply(lemmatize)
cartoon.head()

Unnamed: 0,Explanation,Cleaned Reviews,POS tagged,Lemma
1001,a is a little cartoonish compared to the oth...,a is a little cartoonish compared to the othe...,"[(little, a), (cartoonish, a), (compared, v), ...",little cartoonish compare lady
1208,i like b the most because it gives off a carto...,i like b the most because it gives off a carto...,"[(like, None), (b, v), (gives, v), (cartoon, n...",like b give cartoon ish vibe
1841,all of the other items either seem too carto...,all of the other items either seem too cartoo...,"[(items, n), (either, None), (seem, v), (carto...",item either seem cartoonish unrealistic real...
1844,the drawing seems a bit cartoonish though,the drawing seems a bit cartoonish though,"[(drawing, n), (seems, v), (bit, n), (cartooni...",drawing seem bit cartoonish though
2044,the lower ranked options seemed to outlandish...,the lower ranked options seemed to outlandish...,"[(lower, a), (ranked, v), (options, n), (seeme...",low rank option seem outlandish cartoonish


In [119]:
cartoon_data = pd.DataFrame(cartoon[['Explanation', 'Lemma']])
cartoon_data['Polarity'] = cartoon_data['Lemma'].apply(getPolarity) 
cartoon_data['Analysis'] = cartoon_data['Polarity'].apply(analysis)
#vader
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# function to calculate vader sentiment  
def vadersentimentanalysis(review):
    vs = analyzer.polarity_scores(review)
    return vs['compound']

cartoon_data['Vader Sentiment'] = cartoon_data['Lemma'].apply(vadersentimentanalysis)
# function to analyse 
def vader_analysis(compound):
    if compound >= 0.5:
        return 'Positive'
    elif compound <= -0.5 :
        return 'Negative'
    else:
        return 'Neutral'
    
cartoon_data['Vader Analysis'] = cartoon_data['Vader Sentiment'].apply(vader_analysis)
cartoon_data.head()

Unnamed: 0,Explanation,Lemma,Polarity,Analysis,Vader Sentiment,Vader Analysis
1001,a is a little cartoonish compared to the oth...,little cartoonish compare lady,-0.1875,Negative,0.0,Neutral
1208,i like b the most because it gives off a carto...,like b give cartoon ish vibe,0.0,Neutral,0.3612,Neutral
1841,all of the other items either seem too carto...,item either seem cartoonish unrealistic real...,-0.05,Negative,0.0,Neutral
1844,the drawing seems a bit cartoonish though,drawing seem bit cartoonish though,0.0,Neutral,0.0,Neutral
2044,the lower ranked options seemed to outlandish...,low rank option seem outlandish cartoonish,-0.4,Negative,-0.2732,Neutral


In [123]:
textblob_cartoon = cartoon_data['Analysis'].value_counts()
textblob_cartoon.Positive/textblob_cartoon.sum()*100

49.735449735449734

In [128]:
#df= pd.DataFrame(list(textblob_realistic),list(textblob_futuristic),list(textblob_cartoon))
d= {'realistic': list(textblob_realistic), 'futuristic': list(textblob_futuristic), 'cartoon': list(textblob_cartoon)}
df= pd.DataFrame(data=d)

In [129]:
df

Unnamed: 0,realistic,futuristic,cartoon
0,325,18,94
1,83,15,52
2,5,6,43


In [134]:
good_cartoon=cartoon_data[cartoon_data['Analysis']=='Positive']
good_cartoon.to_csv("good_cartoon.csv")