## Brand advice/insights based on the analysis of social media conversations on Edmunds.com

In [1]:
import pandas as pd
from nltk.tokenize import word_tokenize
from re import sub

In [2]:
#Added a header row to the csv to match prior formatting
edmunds = pd.read_csv("Brands Replaced.csv")

In [3]:
edmunds.head()

Unnamed: 0.1,Unnamed: 0,Mention
0,0,to take delivery of a 2006 buick it wouldn't s...
1,1,just talking with my mother and she said twice...
2,2,for one have not heard of any electrical issue...
3,3,a guess my 2000 buick buick had a problem with...
4,4,three occasions my 2005 buick theft deterrent ...


In [4]:
edmunds.shape

(18132, 2)

In [5]:
car_models = ['acura','audi','bmw','buick','cadillac','chevrolet','chrysler','dodge','ford','honda','hyundai','infiniti','kia','lincoln','mazda','mercedes','mercury','mitsubishi','nissan','pontiac','saturn','subaru','suzuki','toyota','volkswagen','volvo']

###Most Mentioned Models

In [6]:
#Gathering all posts and counting mentions of models
mentions = []
for i in range(len(edmunds.index)):
    x = word_tokenize(sub('[^a-zA-Z]', ' ', str(edmunds.ix[i]['Mention'])).lower())
    single_mention = []
    for j in range(len(x)):
        if x[j] in car_models:
            single_mention.append(x[j])
    if single_mention:
        for k in range(len(pd.Series(single_mention).value_counts())):
            mentions.append(pd.Series(single_mention).value_counts().index[k])

In [7]:
#top ten most mentioned models
most_mentioned_models = pd.Series(mentions).value_counts()
most_mentioned_models[:10]

toyota        1711
honda         1592
hyundai       1267
chevrolet     1244
nissan         790
ford           562
volkswagen     523
bmw            513
chrysler       473
buick          470
dtype: int64

###Lift

In [8]:
#Gererating dataframe for lift calculations
mentions_df = pd.DataFrame(index = range(len(edmunds)))
for m in car_models:
    mentions = []
    for i in range(len(edmunds.index)):
        x = word_tokenize(sub('[^a-zA-Z]', ' ', str(edmunds.ix[i]['Mention'])).lower())
        single_mention = []
        for j in range(len(x)):
            if x[j] == m:
                single_mention.append(x[j])
        if single_mention:
            mentions.append(1)
        else:
            mentions.append(0)
    mentions_df[m] = pd.Series(mentions)

In [9]:
mentions_df.shape

(18132, 26)

In [10]:
mentions_df.head()

Unnamed: 0,acura,audi,bmw,buick,cadillac,chevrolet,chrysler,dodge,ford,honda,...,mercury,mitsubishi,nissan,pontiac,saturn,subaru,suzuki,toyota,volkswagen,volvo
0,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
#Write to csv for processing using 'lift calculations.R'
mentions_df.to_csv('mentions_binary.csv', header=True, index=False)

###Most Mentioned Attributes

In [12]:
edmunds = pd.read_csv("Attributes Replaced.csv")
edmunds.head()

Unnamed: 0.1,Unnamed: 0,Merge
0,0,to take delivery of a 2006 buick it wouldn't s...
1,1,just talking with my mother and she said twice...
2,2,for one have not heard of any electrical issue...
3,3,a guess my 2000 buick buick had a problem with...
4,4,three occasions my 2005 buick theft deterrent ...


In [13]:
model_attributes = ['availability','bestlooking design','betterlooking design','brand','dealer','design','ease','economy','efficiency','engine','experience','exterior','features','functionality','goodlooking design','hybrid','interior','noise','part','performance','price','reliability','safety','seating','service','size','stylish design','transmission','warranty','wheel'] 

In [14]:
#5 most frequently mentioned attributes
attributes = []
for i in range(len(edmunds.index)):
    x = word_tokenize(sub('[^a-zA-Z]', ' ', str(edmunds.ix[i]['Merge'])).lower())
    single_mention = []
    for j in range(len(x)):
        if x[j] in model_attributes:
            single_mention.append(x[j])
    if single_mention:
        for k in range(len(pd.Series(single_mention).value_counts())):
            attributes.append(pd.Series(single_mention).value_counts().index[k])
            
most_mentioned_attributes = pd.Series(attributes).value_counts()
most_mentioned_attributes[:5]

performance    4575
engine         4454
dealer         3590
economy        3129
exterior       2992
dtype: int64

###Most Mentioned Attributes for Top Five Models

In [15]:
#get top attributes per model
def get_attributes(model):
    attributes = []
    model_attributes = ['availability','bestlooking design','betterlooking design','brand','dealer','design','ease','economy','efficiency','engine','experience','exterior','features','functionality','goodlooking design','hybrid','interior','noise','part','performance','price','reliability','safety','seating','service','size','stylish design','transmission','warranty','wheel'] 
    for i in range(len(model)):
        x = model[i]
        single_mention = []
        for j in range(len(x)):
            if x[j] in model_attributes:
                single_mention.append(x[j])
        if single_mention:
            for k in range(len(pd.Series(single_mention).value_counts())):
                attributes.append(pd.Series(single_mention).value_counts().index[k])
    print  'Most Mentioned Attributes:'
    print pd.Series(attributes).value_counts()[:10]
    return pd.Series(attributes).value_counts()[:10]

In [16]:
#top attributes for top five models
top_five_models = ['toyota','honda','hyundai','chevrolet','nissan']
for i in range(len(top_five_models)):
    car_model = []
    for j in range(len(edmunds.index)):
        x = word_tokenize(sub('[^a-zA-Z]', ' ', str(edmunds.ix[j]['Merge'])).lower())
        y = 0
        for k in range(len(x)):
            if x[k] == top_five_models[i]:
                if y == 0:
                    car_model.append(x)
                    y += 1
    print top_five_models[i]
    get_attributes(car_model)
    print '\n'

toyota
Most Mentioned Attributes:
performance     523
economy         504
engine          494
dealer          422
exterior        329
efficiency      279
design          270
wheel           212
transmission    199
features        184
dtype: int64


honda
Most Mentioned Attributes:
performance     632
engine          549
economy         488
efficiency      471
dealer          322
exterior        305
design          276
transmission    258
wheel           190
size            167
dtype: int64


hyundai
Most Mentioned Attributes:
performance     401
engine          326
dealer          307
economy         295
exterior        240
wheel           215
efficiency      209
design          189
features        165
transmission    129
dtype: int64


chevrolet
Most Mentioned Attributes:
engine          481
performance     442
dealer          306
efficiency      304
exterior        263
economy         257
design          174
transmission    173
wheel           172
features        138
dtype: int64


n

###Most Aspirational Brand

In [17]:
edmunds = pd.read_csv("Aspirations Replaced.csv")
aspirations = []
model_aspirations = ['aspirational']

In [18]:
#get number of aspirational posts by model
def get_aspirations(model):
    aspirations = []
    model_aspirations = ['aspirational']
    for i in range(len(model)):
        x = model[i]
        single_mention = []
        for j in range(len(x)):
            if x[j] in model_aspirations:
                single_mention.append(x[j])
        if single_mention:
            for k in range(len(pd.Series(single_mention).value_counts())):
                aspirations.append(pd.Series(single_mention).value_counts().index[k])
    print  'Number of Aspirational Mentions: ' + str(pd.Series(aspirations).value_counts()[0])
    return pd.Series(aspirations).value_counts()[0]

In [19]:
#Count aspirational mentions for each model
for i in range(len(car_models)):
    car_model = []
    for j in range(len(edmunds.index)):
        x = word_tokenize(sub('[^a-zA-Z]', ' ', str(edmunds.ix[j]['Merge'])).lower())
        y = 0
        for k in range(len(x)):
            if x[k] == car_models[i]:
                if y == 0:
                    car_model.append(x)
                    y += 1
    print car_models[i]
    print 'Number of Mentions: ' + str(len(car_model))
    get_aspirations(car_model)
    print '\n'

acura
Number of Mentions: 405
Number of Aspirational Mentions: 60


audi
Number of Mentions: 428
Number of Aspirational Mentions: 63


bmw
Number of Mentions: 513
Number of Aspirational Mentions: 89


buick
Number of Mentions: 470
Number of Aspirational Mentions: 53


cadillac
Number of Mentions: 197
Number of Aspirational Mentions: 33


chevrolet
Number of Mentions: 1244
Number of Aspirational Mentions: 136


chrysler
Number of Mentions: 473
Number of Aspirational Mentions: 67


dodge
Number of Mentions: 223
Number of Aspirational Mentions: 26


ford
Number of Mentions: 562
Number of Aspirational Mentions: 83


honda
Number of Mentions: 1592
Number of Aspirational Mentions: 206


hyundai
Number of Mentions: 1267
Number of Aspirational Mentions: 136


infiniti
Number of Mentions: 271
Number of Aspirational Mentions: 37


kia
Number of Mentions: 234
Number of Aspirational Mentions: 23


lincoln
Number of Mentions: 237
Number of Aspirational Mentions: 30


mazda
Number of Mentions: 365
N