In [472]:
import pandas as pd 
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline 
import datetime, nltk, warnings
import itertools
from collections import Counter 
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import string 
from nltk.corpus import stopwords
import spacy 
from pprint import pprint 
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.en import English

In [383]:
def text_process(corpus):
    nopunc = [char for char in corpus if char not in string.punctuation]
    nopunc = ''.join(nopunc)
    return [word for word in nopunc.split(' ') if word.lower() not in stopwords.words('english')]


def text_process2(corpus):
    nodigit = ''.join([char for char in corpus if not char.isdigit()])
    nopunc = [char for char in nodigit if char not in string.punctuation]
    nopunc = ''.join(nopunc)
    return [word for word in nopunc.split(' ') if word.lower() not in stopwords.words('english')]


# spacy NER tokenizer
def spacy_ner(corpus):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(corpus)
    return [ent.text for ent in doc.ents if ent.label_ == 'PRODUCT' or ent.label_ == 'ORG']
    # return [Counter(lst).most_common(15)]

def basic_tokens(corpus):
    nopunc = [char for char in corpus if char not in string.punctuation]
    nopunc = ''.join(nopunc)
    return [word for word in nopunc.split()]


def quotes(corpus):
    words = [char for char in corpus if char not in string.punctuation]
    results = ', '.join(f'{w}' for w in words)
    return results 


def string_tokens(corpus):
    words = text_process(corpus)
    results = ', '.join(f'"{w}"' for w in words)
    return results 


def spacy_tokens(corpus):
    nlp = English()
    tokenizer = nlp.tokenizer 
    doc = tokenizer(corpus)
    return [word for word in doc if word.is_stop == False]

def spacy_ner(corpus):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(corpus)
    return [ent.text for ent in doc.ents if ent.label_ == 'PRODUCT' or ent.label_ == 'ORG']


def pos(corpus):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(corpus)
    return [spacy.explain(token.tag_) for token in doc]

In [348]:
buyers = pd.read_csv('chanel-250-chanel-buyer.csv')
nonbuyers = pd.read_csv('chanel-250-non-chanel-buyer.csv')
all = pd.read_csv('chanel-full-nlp_nn.csv')

In [349]:
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.colheader_justify', 'center')
pd.options.display.max_columns = 100
pd.options.display.max_rows = 400 

In [350]:
buyers.head()

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response
0,1,32,Anchorage,White,Under 40,A certain price point is definitely a major factor. Anything over $750 seems lux. Also exclusive...
1,1,35,Atlanta,White,Under 40,"I believe a luxury fashion brands are good investment pieces that offers quality, uniqueness and..."
2,1,40,Atlanta,African American,40-60,Craftsmanship and unique pieces
3,1,31,Austin,White,Under 40,"To me, a luxury brand is brand that has withstand the test of time and is iconic. Iconic in the ..."
4,1,32,Baltimore,White,Under 40,"The quality, service and relationship are the most important factors. Service not just at the ti..."


In [352]:
nonbuyers.head()

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response
0,0,28,Atlanta,White,Under 40,"The quality and craftsmanship of the products that the brand offers, also the level of service a..."
1,0,37,Atlanta,White,Under 40,"It’s the quality, the customer service and the longevity of the products. I have things that i w..."
2,0,42,Atlanta,White,40-60,"Statement pieces that will be around for a long time, quality"
3,0,29,Atlanta,White,Under 40,The pieces are classic and withstand time. They are about quality and not just what is trendy at...
4,0,34,Atlanta,White,Under 40,"To me, a luxury fashion brand is lifestyle design through clothing, accessories & often other li..."


In [351]:
all.head()

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response
0,1,32,Anchorage,White,Under 40,A certain price point is definitely a major factor. Anything over $750 seems lux. Also exclusive...
1,1,35,Atlanta,White,Under 40,"I believe a luxury fashion brands are good investment pieces that offers quality, uniqueness and..."
2,1,40,Atlanta,African American,40-60,Craftsmanship and unique pieces
3,0,28,Atlanta,White,Under 40,"The quality and craftsmanship of the products that the brand offers, also the level of service a..."
4,0,37,Atlanta,White,Under 40,"It’s the quality, the customer service and the longevity of the products. I have things that i w..."


In [356]:
analyzer = SentimentIntensityAnalyzer()

all['score'] = all['response'].apply(lambda response: analyzer.polarity_scores(response)['compound'])
all.sort_values(by=['score'], ascending=False, inplace=True)

buyers['score'] = buyers['response'].apply(lambda response: analyzer.polarity_scores(response)['compound'])
buyers.sort_values(by=['score'], ascending=False, inplace=True)

nonbuyers['score'] = nonbuyers['response'].apply(lambda response: analyzer.polarity_scores(response)['compound'])
nonbuyers.sort_values(by=['score'], ascending=False, inplace=True)

In [357]:
all.head()

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response,score
232,0,50,San Diego,White,40-60,I feel that the luxury fashion brand elevates my wardrobe and how I feel when wearing it! I just...,0.9951
59,1,25,Houston,White,Under 40,For me personally what differentiates a luxury fashion brand are several factors. First and fore...,0.9887
117,0,44,Los Angeles,Asian,40-60,I strongly believe that the luxury brands are fully aware of their value of the “brand names” an...,0.9887
229,1,52,San Diego,White,40-60,"The differentiating qualities of a luxury fashion brand includes source materials, design, quali...",0.9831
246,0,44,Seattle,,40-60,"For me a luxury brand means superb quality, exquisite craftsmanship, items that stand the test o...",0.983


In [358]:
buyers.head()

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response,score
21,1,25,Houston,White,Under 40,For me personally what differentiates a luxury fashion brand are several factors. First and fore...,0.9887
87,1,52,San Diego,White,40-60,"The differentiating qualities of a luxury fashion brand includes source materials, design, quali...",0.9831
43,1,24,Los Angeles,White,Under 40,Designer bags are truly timeless pieces that one can enjoy for the remainder of their life where...,0.9819
37,1,55,Los Angeles,Asian,40-60,"Design: each luxury fashion brand has timeless design. Chanel: four pocket design jacket, quilte...",0.979
51,1,52,Miami,White,40-60,"There is a reason why there is a cache attached to Luxury fashion--it's fit, quality and structu...",0.9702


In [359]:
nonbuyers.head()

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response,score
144,0,50,San Diego,White,40-60,I feel that the luxury fashion brand elevates my wardrobe and how I feel when wearing it! I just...,0.9951
69,0,44,Los Angeles,Asian,40-60,I strongly believe that the luxury brands are fully aware of their value of the “brand names” an...,0.9887
151,0,44,Seattle,,40-60,"For me a luxury brand means superb quality, exquisite craftsmanship, items that stand the test o...",0.983
11,0,47,Boston,Asian,40-60,I think luxury brands are more about catering to classic and timeless designs. You can wear thes...,0.9822
71,0,36,Los Angeles,White,Under 40,A luxury fashion brand is something that lasts because it is made with great quality materials.T...,0.9724


In [363]:
all['length'] = all['response'].apply(len)
all['nltk_tokens'] = all['response'].apply(text_process2)
all['length_nltk'] = all['nltk_tokens'].apply(len)

buyers['length'] = buyers['response'].apply(len)
buyers['nltk_tokens'] = buyers['response'].apply(text_process2)
buyers['length_nltk'] = buyers['nltk_tokens'].apply(len)

nonbuyers['length'] = nonbuyers['response'].apply(len)
nonbuyers['nltk_tokens'] = nonbuyers['response'].apply(text_process2)
nonbuyers['length_nltk'] = nonbuyers['nltk_tokens'].apply(len)

In [494]:
all.head(10)

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response,score,length,nltk_tokens,length_nltk
232,0,50,San Diego,White,40-60,I feel that the luxury fashion brand elevates my wardrobe and how I feel when wearing it! I just...,0.9951,513,"[feel, luxury, fashion, brand, elevates, wardrobe, feel, wearing, feel, better, Designers, spend...",47
59,1,25,Houston,White,Under 40,For me personally what differentiates a luxury fashion brand are several factors. First and fore...,0.9887,1276,"[personally, differentiates, luxury, fashion, brand, several, factors, First, foremost, quality,...",113
117,0,44,Los Angeles,Asian,40-60,I strongly believe that the luxury brands are fully aware of their value of the “brand names” an...,0.9887,629,"[strongly, believe, luxury, brands, fully, aware, value, “brand, names”, influences, society, ev...",55
229,1,52,San Diego,White,40-60,"The differentiating qualities of a luxury fashion brand includes source materials, design, quali...",0.9831,963,"[differentiating, qualities, luxury, fashion, brand, includes, source, materials, design, qualit...",91
246,0,44,Seattle,,40-60,"For me a luxury brand means superb quality, exquisite craftsmanship, items that stand the test o...",0.983,1063,"[luxury, brand, means, superb, quality, exquisite, craftsmanship, items, stand, test, time, some...",99
17,0,47,Boston,Asian,40-60,I think luxury brands are more about catering to classic and timeless designs. You can wear thes...,0.9822,842,"[think, luxury, brands, catering, classic, timeless, designs, wear, brands, anytime, almost, any...",82
92,1,24,Los Angeles,White,Under 40,Designer bags are truly timeless pieces that one can enjoy for the remainder of their life where...,0.9819,638,"[Designer, bags, truly, timeless, pieces, one, enjoy, remainder, life, whereas, regular, leather...",64
86,1,55,Los Angeles,Asian,40-60,"Design: each luxury fashion brand has timeless design. Chanel: four pocket design jacket, quilte...",0.979,794,"[Design, luxury, fashion, brand, timeless, design, Chanel, four, pocket, design, jacket, quilted...",75
126,1,52,Miami,White,40-60,"There is a reason why there is a cache attached to Luxury fashion--it's fit, quality and structu...",0.9769,599,"[reason, cache, attached, Luxury, fashionits, fit, quality, structure, find, far, superior, Also...",56
121,0,36,Los Angeles,White,Under 40,A luxury fashion brand is something that lasts because it is made with great quality materials.T...,0.9724,695,"[luxury, fashion, brand, something, lasts, made, great, quality, materialsThey, last, generation...",61


In [365]:
buyers.head()

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response,score,length,nltk_tokens,length_nltk
21,1,25,Houston,White,Under 40,For me personally what differentiates a luxury fashion brand are several factors. First and fore...,0.9887,1280,"[personally, differentiates, luxury, fashion, brand, several, factors, First, foremost, quality,...",113
87,1,52,San Diego,White,40-60,"The differentiating qualities of a luxury fashion brand includes source materials, design, quali...",0.9831,963,"[differentiating, qualities, luxury, fashion, brand, includes, source, materials, design, qualit...",91
43,1,24,Los Angeles,White,Under 40,Designer bags are truly timeless pieces that one can enjoy for the remainder of their life where...,0.9819,638,"[Designer, bags, truly, timeless, pieces, one, enjoy, remainder, life, whereas, regular, leather...",64
37,1,55,Los Angeles,Asian,40-60,"Design: each luxury fashion brand has timeless design. Chanel: four pocket design jacket, quilte...",0.979,794,"[Design, luxury, fashion, brand, timeless, design, Chanel, four, pocket, design, jacket, quilted...",75
51,1,52,Miami,White,40-60,"There is a reason why there is a cache attached to Luxury fashion--it's fit, quality and structu...",0.9702,602,"[reason, cache, attached, Luxury, fashionits, fit, quality, structure, find, far, superior, Also...",56


In [366]:
nonbuyers.head()

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response,score,length,nltk_tokens,length_nltk
144,0,50,San Diego,White,40-60,I feel that the luxury fashion brand elevates my wardrobe and how I feel when wearing it! I just...,0.9951,513,"[feel, luxury, fashion, brand, elevates, wardrobe, feel, wearing, feel, better, Designers, spend...",47
69,0,44,Los Angeles,Asian,40-60,I strongly believe that the luxury brands are fully aware of their value of the “brand names” an...,0.9887,629,"[strongly, believe, luxury, brands, fully, aware, value, “brand, names”, influences, society, ev...",55
151,0,44,Seattle,,40-60,"For me a luxury brand means superb quality, exquisite craftsmanship, items that stand the test o...",0.983,1063,"[luxury, brand, means, superb, quality, exquisite, craftsmanship, items, stand, test, time, some...",99
11,0,47,Boston,Asian,40-60,I think luxury brands are more about catering to classic and timeless designs. You can wear thes...,0.9822,842,"[think, luxury, brands, catering, classic, timeless, designs, wear, brands, anytime, almost, any...",82
71,0,36,Los Angeles,White,Under 40,A luxury fashion brand is something that lasts because it is made with great quality materials.T...,0.9724,695,"[luxury, fashion, brand, something, lasts, made, great, quality, materialsThey, last, generation...",61


In [384]:
buyers['string2_tokens'] = buyers['nltk_tokens'].apply(quotes)

In [385]:
buyers.head()

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response,score,length,nltk_tokens,length_nltk,string_tokens,string2_tokens
21,1,25,Houston,White,Under 40,For me personally what differentiates a luxury fashion brand are several factors. First and fore...,0.9887,1280,"[personally, differentiates, luxury, fashion, brand, several, factors, First, foremost, quality,...",113,"""personally"", ""differentiates"", ""luxury"", ""fashion"", ""brand"", ""several"", ""factors"", ""First"", ""fo...","personally, differentiates, luxury, fashion, brand, several, factors, First, foremost, quality, ..."
87,1,52,San Diego,White,40-60,"The differentiating qualities of a luxury fashion brand includes source materials, design, quali...",0.9831,963,"[differentiating, qualities, luxury, fashion, brand, includes, source, materials, design, qualit...",91,"""differentiating"", ""qualities"", ""luxury"", ""fashion"", ""brand"", ""includes"", ""source"", ""materials"",...","differentiating, qualities, luxury, fashion, brand, includes, source, materials, design, quality..."
43,1,24,Los Angeles,White,Under 40,Designer bags are truly timeless pieces that one can enjoy for the remainder of their life where...,0.9819,638,"[Designer, bags, truly, timeless, pieces, one, enjoy, remainder, life, whereas, regular, leather...",64,"""Designer"", ""bags"", ""truly"", ""timeless"", ""pieces"", ""one"", ""enjoy"", ""remainder"", ""life"", ""whereas...","Designer, bags, truly, timeless, pieces, one, enjoy, remainder, life, whereas, regular, leather,..."
37,1,55,Los Angeles,Asian,40-60,"Design: each luxury fashion brand has timeless design. Chanel: four pocket design jacket, quilte...",0.979,794,"[Design, luxury, fashion, brand, timeless, design, Chanel, four, pocket, design, jacket, quilted...",75,"""Design"", ""luxury"", ""fashion"", ""brand"", ""timeless"", ""design"", ""Chanel"", ""four"", ""pocket"", ""desig...","Design, luxury, fashion, brand, timeless, design, Chanel, four, pocket, design, jacket, quilted,..."
51,1,52,Miami,White,40-60,"There is a reason why there is a cache attached to Luxury fashion--it's fit, quality and structu...",0.9702,602,"[reason, cache, attached, Luxury, fashionits, fit, quality, structure, find, far, superior, Also...",56,"""reason"", ""cache"", ""attached"", ""Luxury"", ""fashionits"", ""fit"", ""quality"", ""structure"", ""find"", ""f...","reason, cache, attached, Luxury, fashionits, fit, quality, structure, find, far, superior, Also,..."


In [399]:
# nlp = spacy.load("en_core_web_sm")
# doc = nlp(buyers['string2_tokens'][0])

# for token in doc:
#        if token.text != ',':
#            print(f'    {token.text:{15}} -->   {spacy.explain(token.tag_).upper()}')
#        else:
#            pass

In [400]:
# from spacy import displacy

# doc = nlp(buyers['string2_tokens'][0])
# displacy.render(doc, style="ent")

In [477]:
items = pd.read_csv('items.csv')

In [478]:
items.head(10)

Unnamed: 0,items
0,Blouse
1,Blouse
2,Blouse
3,Blouse
4,Blouse
5,Blouse
6,Boots
7,cuffed
8,Dress
9,Dress


In [483]:
top_items = Counter(items['items'])

In [487]:
items = top_items.most_common(14)
items 

[('Handbag', 226),
 ('Shoes', 158),
 ('Jacket', 98),
 ('Dress', 54),
 ('Sweater', 40),
 ('T Shirt', 25),
 ('Sweatshirt', 15),
 ('Shirt', 10),
 ('Blouse', 6),
 ('Skirt', 6),
 ('Pant', 5),
 ('Pant ', 5),
 ('Tank', 2),
 ('Boots', 1)]

In [489]:
topitems = pd.DataFrame.from_records(items, columns=['items', 'counts'])
topitems.head()

Unnamed: 0,items,counts
0,Handbag,226
1,Shoes,158
2,Jacket,98
3,Dress,54
4,Sweater,40


In [493]:
fig_pie_items = px.pie(topitems, values='counts', names='items', 
                    title='Top 25 Items uploaded by Participants', 
                    template='seaborn', color='counts', opacity=0.8, 
                    color_discrete_sequence=px.colors.sequential.RdBu_r,
                    height=800)
fig_pie_items.show()

In [None]:
fig_pie_c = px.pie(top25, values='counts', names='brands', 
                    title='Top 25 Brands uploaded by Participants', 
                    template='seaborn', color='counts', opacity=0.8, 
                    color_discrete_sequence=px.colors.sequential.tempo_r,
                    height=800)
fig_pie_c.show()

In [506]:
fig_pie_15 = px.pie(tp15, values='counts', names='brands', 
                    title='Top 15 Brands uploaded by Participants', 
                    template='seaborn', color='counts', opacity=0.8, 
                    color_discrete_sequence=px.colors.sequential.RdBu_r,
                    height=800)
fig_pie_15.show()

In [318]:
top25 = pd.DataFrame.from_records(counts, columns=['brands', 'counts'])
top25.head()

Unnamed: 0,brands,counts
0,Chanel,121
1,Gucci,73
2,Hermes,63
3,Burberry,45
4,Louis Vuitton,40


In [496]:
tp15 = pd.read_csv('chanel-top15.csv')
tp15.head(15)

Unnamed: 0,brands,counts
0,Chanel,121
1,Gucci,73
2,Hermes,63
3,Burberry,45
4,Louis Vuitton,40
5,Dior,32
6,YSL,28
7,Prada,23
8,Fendi,22
9,Valentino,18


In [500]:
tp25 = pd.read_csv('uploaded-brands.csv')
tp25.head(15)

Unnamed: 0,brands
0,Alaia
1,Alexander McQueen
2,Alexander McQueen
3,Aquazzura
4,Armani
5,Armani
6,Armani
7,Armani
8,Armani
9,Attico


In [501]:
top_25 = Counter(tp25['brands'])
top_25 = top_25.most_common()
top_25

[('Chanel', 121),
 ('Gucci', 73),
 ('Hermes', 63),
 ('Burberry', 45),
 ('Louis Vuitton', 40),
 ('Dior', 32),
 ('YSL', 28),
 ('Prada', 23),
 ('Fendi', 22),
 ('Valentino', 18),
 ('Dolce & Gabbana', 16),
 ('Balenciaga', 15),
 ('Balmain', 15),
 ('Versace', 10),
 ('Bottega Veneta', 8),
 ('Celine', 8),
 ('Manolo Blahnik', 8),
 ('Armani', 7),
 ('Stella McCartney', 5),
 ('Brunello Cucinelli', 4),
 ('Miu Miu', 4),
 ('The Row', 4),
 ('Givenchy', 3),
 ('Jimmy Choo', 3),
 ('Louboutin ', 3),
 ('YSL ', 3),
 ('Balenciago', 2),
 ('Chloe', 2),
 ('Christian Louboutin', 2),
 ('Max Mara', 2),
 ('Tom Ford', 2),
 ('Alaia', 1),
 ('Alexander McQueen', 1),
 ('Alexander McQueen ', 1),
 ('Aquazzura', 1),
 ('Attico', 1),
 ('Bailmain', 1),
 ('Balenciaga ', 1),
 ('Balmain ', 1),
 ('Brunelleschi', 1),
 ('Brunello Cuccinelli', 1),
 ('Celine ', 1),
 ('Chael', 1),
 ('Chrome Heart', 1),
 ('Dolce & Gabanna', 1),
 ('Ferragamo ', 1),
 ('Giannvito Rossi', 1),
 ('Gibenchy', 1),
 ('Givenchy ', 1),
 ('Goyard', 1),
 ('Isabel Ma

In [503]:
t25 = pd.DataFrame.from_records(top_25, columns=['brands', 'counts'])
t25.head()

Unnamed: 0,brands,counts
0,Chanel,121
1,Gucci,73
2,Hermes,63
3,Burberry,45
4,Louis Vuitton,40


In [403]:
top_cities = Counter(buyers['city'])
top_cities = top_cities.most_common()
top_cities

[('Los Angeles', 26),
 ('New York', 20),
 ('Miami', 10),
 ('Chicago', 9),
 ('San Francisco', 4),
 ('New York ', 4),
 ('Houston', 3),
 ('Philadelphia', 3),
 ('Honolulu', 2),
 ('Atlanta', 2),
 ('Seattle', 2),
 ('San Diego', 1),
 ('Anchorage', 1),
 ('Austin', 1),
 ('Baltimore', 1),
 ('Santa Monica', 1),
 ('Charlotte', 1),
 ('Las Vegas', 1),
 ('Dallas', 1),
 ('Boston', 1),
 ('Washington D.C.', 1),
 ('Denver', 1)]

In [404]:
topcities = pd.DataFrame.from_records(top_cities, columns=['city', 'counts'])
topcities.head()

Unnamed: 0,city,counts
0,Los Angeles,26
1,New York,20
2,Miami,10
3,Chicago,9
4,San Francisco,4


### Top 25 brands uploaded by participants

In [504]:
fig_pie_c = px.pie(t25, values='counts', names='brands', 
                    title='Top 25 Brands uploaded by Participants', 
                    template='seaborn', color='counts', opacity=0.8, 
                    color_discrete_sequence=px.colors.sequential.RdBu,
                    height=800)
fig_pie_c.show()

In [408]:
fig_pie_city = px.pie(topcities, values='counts', names='city', labels='city',
                    title='Top Cities of Participants', 
                    template='seaborn', color='counts', opacity=0.8, 
                    color_discrete_sequence=px.colors.sequential.PuRd,
                    height=800)
fig_pie_city.show()

In [347]:
bar_fig = px.bar(top25, x='brands', y='counts', 
                 hover_data=['brands', 'counts'], color='counts', opacity=0.8, color_discrete_sequence=px.colors.sequential.RdBu, 
                 labels={'brand':'brands owned'}, 
                 height=800, template='seaborn', title='Top 25 Brands uploaded by Participants')
bar_fig.show()

In [186]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [187]:
import cufflinks as cf

In [191]:
import numpy as np 

In [192]:
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
cf.go_offline()

#To display the plots
%matplotlib inline

In [235]:
top10 = pd.read_csv('top10.csv')

In [236]:
top10.iplot(kind = 'bar', y = 'counts', x = 'brands')

In [206]:
import plotly.express as px
data = top10

In [None]:
top25 = pd.DataFrame.from_records()

In [244]:
import plotly.graph_objects as go
import numpy as np
np.random.seed(1)

In [251]:
N = 10
colors = np.random.rand(N)
x = top10['brands']
y = top10['counts']
sz = np.random.rand(N) * 30

In [257]:
for template in ["plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"]:
    fig = px.bar(top10, 
                 x='brands', y='counts', 
                 hover_data=['brands', 'counts'], color='counts', opacity=0.6, color_continuous_scale="Viridis", 
                 labels={'brand':'brands owned'}, 
                 height=700, template='plotly', title='Top 10 Brands uploaded by Participants')
fig.show()

In [277]:
nonbuyers.sort_values(by=['length'], ascending=False, inplace=True)

In [279]:
nonbuyers['length'].mean()

217.67948717948718

In [290]:
print('Chanel buyers - stats on response "length"\n')
print(df['length'].describe())

Chanel buyers - stats on response "length"

count      96.000000
mean      270.739583
std       240.041367
min        24.000000
25%        98.750000
50%       212.500000
75%       371.250000
max      1316.000000
Name: length, dtype: float64


In [291]:
print('non-Chanel buyers - stats on response "length"\n')
print(nonbuyers['length'].describe())

non-Chanel buyers - stats on response "length"

count     156.000000
mean      217.679487
std       181.211549
min         9.000000
25%        95.500000
50%       182.500000
75%       285.500000
max      1063.000000
Name: length, dtype: float64


In [295]:
print('Chanel buyers - stats on response "sentiment"\n')
print(df['sentiment'].describe())

Chanel buyers - stats on response "sentiment"

count    96.000000
mean      0.466669
std       0.409417
min      -0.340000
25%       0.000000
50%       0.564650
75%       0.864450
max       0.988700
Name: sentiment, dtype: float64


In [293]:
print('non-Chanel buyers - stats on response "sentiment"\n')
print(nonbuyers['sentiment'].describe())

non-Chanel buyers - stats on response "sentiment"

count    156.000000
mean       0.423355
std        0.369471
min       -0.372400
25%        0.000000
50%        0.440400
75%        0.785150
max        0.995100
Name: sentiment, dtype: float64


In [303]:
print(df['age'].describe())

count    96.000000
mean     40.145833
std       9.353839
min      24.000000
25%      32.750000
50%      39.500000
75%      47.000000
max      61.000000
Name: age, dtype: float64


In [304]:
print(nonbuyers['age'].describe())

count    156.000000
mean      39.096154
std        9.182577
min       23.000000
25%       32.000000
50%       39.000000
75%       45.000000
max       60.000000
Name: age, dtype: float64


In [298]:
buyer_age = df["age"].iplot(kind="histogram", bins=20, theme="white", title="Ages of Chanel Buyers",xTitle='Ages', yTitle='Count')

In [299]:
non_buyer_age = nonbuyers["age"].iplot(kind="histogram", bins=20, theme="white", title="Ages of non-Chanel Buyers",xTitle='Ages', yTitle='Count')

In [None]:
for template in ["plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"]:
    fig_hist = px.histogram(df, 
                 x='ages', y='counts', 
                 hover_data=['brands', 'counts'], color='counts', opacity=0.6, color_continuous_scale="Viridis", 
                 labels={'brand':'brands owned'}, 
                 height=700, template='plotly', title='Top 10 Brands uploaded by Participants')
fig.show()

In [300]:
df.head()

Unnamed: 0,chanel buyer,age,city,ethnicity,group,response,length,sentiment,nltk_tokens,length_nltk
49,1,54,Los Angeles,White,40-60,Exclusivity - the item I buy won't be easy to find or for others to find. Quality - the construc...,1316,0.9652,"[Exclusivity, , item, buy, wont, easy, find, others, find, Quality, , construction, finest, made...",119
21,1,25,Houston,White,Under 40,For me personally what differentiates a luxury fashion brand are several factors. First and fore...,1280,0.9887,"[personally, differentiates, luxury, fashion, brand, several, factors, First, foremost, quality,...",113
87,1,52,San Diego,White,40-60,"The differentiating qualities of a luxury fashion brand includes source materials, design, quali...",963,0.9831,"[differentiating, qualities, luxury, fashion, brand, includes, source, materials, design, qualit...",91
37,1,55,Los Angeles,Asian,40-60,"Design: each luxury fashion brand has timeless design. Chanel: four pocket design jacket, quilte...",794,0.979,"[Design, luxury, fashion, brand, timeless, design, Chanel, four, pocket, design, jacket, quilted...",75
17,1,40,Denver,White,40-60,IMO the 3 major fashion conglomerates that produce 90% of the Luxury market share factories. The...,662,-0.0043,"[IMO, , major, fashion, conglomerates, produce, , Luxury, market, share, factories, buy, product...",67


In [302]:
fig_hist = px.histogram(df, x="age")
fig_hist.show()