# Workflow for Analyzing Common Words in Negative Home Depot Store Reviews
### Created by Eric Nutt¶

In [2]:
# Import necessary packages
import pandas as pd
import nltk
from collections import Counter
from nltk.corpus import stopwords
from nltk.collocations import *

stop = stopwords.words('english')

## Define function for reading Google Maps store reviews .csv into a simple pandas dataframe.

In [3]:
# Define function for importing csv to pandas dataframe
def import_review_csv(csv_path):
    """Imports a csv of reviews, creates a dataframe, adds year column and indexes on it
    
    Paramaters
    ----------
    file_path : string
        Path to file
        
    Returns
    -------
    returned_data : dataframe
        Dataframe created from loaded csv
    """
    # Read csv as dataframe
    df = pd.read_csv(csv_path)
    # Convert date coloumn to datetime
    df['review_datetime_utc'] = pd.to_datetime(df['review_datetime_utc'])
    # Remove timestamp from Date and store it in a new column
    df['Date'] = df['review_datetime_utc'].dt.date
    # Set the index to the DATE column
    df2 = df.set_index('Date')
    # Keep only author_title, review_text, review_rating, review_datetime_utc
    df3 = df2[['author_title','review_text','review_rating']]
    # Drop rows with NaN values (not comment left with rating)
    df4 = df3.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
    # Convert index to datetime
    df4.index = pd.to_datetime(df4.index)
    # Parse dates for year index
    df5 = df4.sort_index()
    return df5

### Apply workflow to the Home Depot store in Dripping Springs, TX.

In [4]:
# Define path to csv reviews for Home Depot Dripping Springs
home_depot_path = '/Users/ericnutt/Desktop/data_analytics_projects/home_depot/home_depot_drpspr_tx_reviews.csv'

home_depot_df = import_review_csv(home_depot_path)
home_depot_df

Unnamed: 0_level_0,author_title,review_text,review_rating
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-03-10,Dennis Steele,When I ask a service desk person if their larg...,1
2015-09-21,Tony Ciccone,09/21/2015\nThe people at the Dripping Springs...,1
2016-09-22,Victor Hugo Martínez,Bad service in contractor checkout,1
2017-08-05,Honky 14/88,"Trash store, trash employees, and the LP at th...",1
2017-10-01,B Chase1 Slave,Didn't have what I needed this trip...out of 1...,1
2018-05-27,John Pena,Return of leaky hose. Poor experience. Took ...,1
2018-06-09,Elaine Botha,No customer service. No pride in doing one's j...,1
2018-09-10,Gennie Howard,No veteran's discount!,1
2018-09-16,Vishous Mayo,"Horrible,",1
2019-06-06,Brandon Thomas,DO NOT BUY COUNTERTOPS FROM HOME DEPOT!!!!! Th...,1


## Define function for generating common words list.
This function will generate a list of the most common words (ommiting stopwords) with a word count. I've applied this function to the dataframe for the Home Depot store in Dripping Springs, TX.

In [5]:
# Write function to get most common words without stop words

# Define function for getting most common meaningful words
def most_common_words(store_df):
    """Reads review text column and gets most common words without stopwords
    
    Paramaters
    ----------
    store_df : pandas dataframe
        store dataframe
        
    Returns
    -------
    returned_data : list
        list of most common words in review text
    """
    # Exclude stopwords with Python's list comprehension and pandas.DataFrame.apply.
    store_df['review_without_stopwords'] = store_df['review_text'].apply(
        lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
    # Get most common words from review text
    common_words = Counter(" ".join(store_df["review_without_stopwords"].str.lower()).split()).most_common(100)
    return common_words

In [7]:
# Test function on Home Depot df
most_common_words(home_depot_df)

[('i', 56),
 ('home', 24),
 ('depot', 16),
 ('the', 11),
 ('would', 11),
 ('never', 10),
 ('like', 10),
 ('store', 9),
 ('service', 8),
 ("i've", 8),
 ('people', 8),
 ('help', 7),
 ('delivery', 7),
 ('rude', 7),
 ('one', 7),
 ('time', 7),
 ('way', 7),
 ('put', 6),
 ('call', 6),
 ('they', 6),
 ('it', 6),
 ('desk', 5),
 ('person', 5),
 ('go', 5),
 ('back', 5),
 ('buy', 5),
 ('bad', 5),
 ('customer', 5),
 ('find', 5),
 ('drive', 5),
 ('bags', 5),
 ('ask', 4),
 ('need', 4),
 ('it.', 4),
 ('dripping', 4),
 ('ever', 4),
 ('times', 4),
 ('take', 4),
 ('check', 4),
 ('took', 4),
 ('no', 4),
 ('worst', 4),
 ('went', 4),
 ('know', 4),
 ('store.', 4),
 ('asked', 4),
 ('trip', 4),
 ('looking', 4),
 ('management', 4),
 ('lowes', 4),
 ('and', 4),
 ('bee', 4),
 ('hd', 4),
 ('wire', 4),
 ('clips', 4),
 ('tool', 3),
 ('could', 3),
 ('told', 3),
 ('five', 3),
 ('refrigerator', 3),
 ('called', 3),
 ('anything', 3),
 ('service.', 3),
 ('return', 3),
 ('poor', 3),
 ('not', 3),
 ('this', 3),
 ('since', 3),


## Summary & Conclusions
The workflow above reads a .csv of google maps store reviews into a dataframe with the columns: date of review, review author, review text, and review rating (stars). A function is then applied to that new dataframe to get the most common words (ommiting stopwords) for a more efficient analysis of the review text.

#### Dripping Springs, Home Depot
"Service", "rude", and "help" show up a combined total of 22 times in 37 one-star reviews. Home Depot's inverted pyramid structure puts customers at the top, and great customer service is one of the Home Depot's core values. I myslef have experienced exemplary customer service whenever I go into a Home Depot store, and I know that instances of employees being rude to customers is the exception not the norm. However, I believe any and all customer concerns should be given their due diligence, so one might look further into these reviews. Again, I expect these reviews to be the exception, as the avergae rating for the Dripping Springs store is high at 4.2/5 out of over 500 ratings. That being said, small problems have a habit of becoming larger ones if left unchecked.