# Customer Success Challenge
## Importing libraries


In [1]:
import pandas as pd 
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
pd.set_option('display.max_columns', 100)
pd.options.plotting.backend='plotly'
import warnings
warnings.filterwarnings('ignore')

## Reading the excel worksheet and exploring the data format


In [2]:
xls_file = pd.ExcelFile('Customer Data Analyst - Skills Test (BlueMountain).xls')
customer_data = pd.read_excel(xls_file, 'Excel Export')
customer_data.head()

Unnamed: 0,Id,Name,Href,_ExternalId,Receipt No,Customer No,Shop Date,Store,NPS Label,NPS Score,Likelihood To Shop Again,Customer Satisfaction Score,Category,Question,Comment,Total Spend,Age,Age Group,Gender,Price,Availability,Service,Information,Requested Callback,Time Of Day,Region,Division,Comment Tags
0,0,Detractor,,10,40323,9902292,2014-10-02T11:00:00,Bankstown,Detractor,5,6,5,Product Quality Comments,Product Quality Dissatisfaction Comments,Some of the garments I was interested in most were dirty. I was dissatisfied with this seeing as though I was purchasing gifts for newborns.,$887.75,44,40-49,Female,Worse,Worse,Worse,Worse,No,11am,Region 2,Apparel,"Cleanliness, Buy, Apparel, Purchase, Quality, unhappy,"
1,2,Neutral,,10001,87232,6548111,2014-10-02T11:00:00,Fremantle,Neutral,8,8,8,Satisfaction Comments,Satisfaction Comments,"Shop nicely arranged could find what I wanted easily, happy friendly staff",$441.57,71,60+,Female,Worse,Worse,Worse,Worse,Yes,11am,Region 2,Other,"Staff, Friendly, Happy, Service, Shopping,"
2,3,Neutral,,10005,91864,6332969,2014-10-02T11:00:00,Chadstone,Neutral,8,8,8,Satisfaction Comments,Satisfaction Comments,you had the item I wanted at a reasonable price,$327.09,56,50-59,Male,Worse,Better,Same,Worse,No,11am,Region 3,House,"Staff, Item, Price, Range,"
3,5,Promoter,,10009,72154,1125720,2014-10-02T11:00:00,Perth CBD,Promoter,9,10,8,Satisfaction Comments,Satisfaction Comments,Happy and helpfull,$848.26,47,40-49,Female,Worse,Better,Worse,Worse,Yes,11am,Region 4,Other,"Happy, Service,"
4,6,Promoter,,10010,71470,1495545,2014-10-02T17:00:00,Katanning,Promoter,9,9,8,Satisfaction Comments,Satisfaction Comments,Whenever I asked for help they patiently assist me.,$304.24,66,60+,Female,Same,Better,Better,Worse,Yes,5pm,Region 3,House,Staff


In [3]:
customer_data.dtypes

Id                               int64
Name                            object
Href                           float64
_ExternalId                      int64
Receipt No                       int64
Customer No                      int64
Shop Date                       object
Store                           object
NPS Label                       object
NPS Score                        int64
Likelihood To Shop Again         int64
Customer Satisfaction Score      int64
Category                        object
Question                        object
Comment                         object
Total Spend                     object
Age                              int64
Age Group                       object
Gender                          object
Price                           object
Availability                    object
Service                         object
Information                     object
Requested Callback              object
Time Of Day                     object
Region                   

# Stats on Data: 
## The below stats tell us a lot about the data, few of the immediate key findings are: 
1. Looking at the Likelihood of customers to return and shop again, the average is 7.5 out of 10. Few customers have rated 0, which means they wouldn't be shopping again. 
2. Customer satisfaction score hits an average of 7/10, while the maximum score ever recorded in the data is 8 with the least being 4. 
3. The average age of shoppers is 48, with the maximum age of customer being 77 and minimum age of customer being 18 that have shopped in store. 

In [4]:
customer_data.describe()


Unnamed: 0,Id,Href,_ExternalId,Receipt No,Customer No,NPS Score,Likelihood To Shop Again,Customer Satisfaction Score,Age
count,3212.0,0.0,3212.0,3212.0,3212.0,3212.0,3212.0,3212.0,3212.0
mean,2100.769303,,7768.525841,50549.420299,5114535.0,7.547634,7.555417,7.179016,48.066936
std,1228.711205,,4111.82507,28367.98487,2913550.0,1.221334,2.101973,1.263737,17.328387
min,0.0,,3.0,1010.0,101976.0,5.0,0.0,4.0,18.0
25%,1019.75,,4722.0,25987.75,2568726.0,7.0,6.0,6.0,33.0
50%,2081.5,,8012.0,50039.5,5109123.0,8.0,8.0,8.0,48.0
75%,3171.25,,11072.5,74933.75,7639817.0,8.0,9.0,8.0,64.0
max,4250.0,,14829.0,100979.0,10089060.0,9.0,10.0,8.0,77.0


# Branch wise analysis: 

In [5]:
branch_data = customer_data[['Store','Likelihood To Shop Again','Customer Satisfaction Score','Total Spend','Age','Gender','Price','Availability','Service','Information','Time Of Day', 'Division','Requested Callback']]

## Looks like there are a lot of branches(261 Infact), lets visualise each of the variables with respect to branch to find out more.

In [6]:
branch_data['Store'].unique()

array(['Bankstown', 'Fremantle', 'Chadstone', 'Perth CBD', 'Katanning',
       'Ocean Shores', 'Edwardstown', 'Helensvale', 'Hoppers Crossing',
       'Albany', 'Kadina', 'Castlemaine', 'Glenorchy', 'Tea Tree Plaza',
       'Campbelltown', 'Seymour', 'Bullcreek', 'Warrawong', 'Epping',
       'Brisbane CBD', 'Yass', '5224-Unknown', 'Ararat', 'Golden Grove',
       'Charlestown', 'Mt. Gravatt', 'Plenty Valley', 'Northland',
       'Knox City', 'Gladstone', 'Mornington', 'Reynella', 'Penrith',
       'Point Cook', 'Carlingford', 'Forest Hill', 'Waurn Ponds',
       'Morisset', 'Sefton Park', 'Meadow Springs', 'Hornsby', 'Nowra',
       'Cannot find store', 'Innaloo', 'Ballarat', 'Strathpine',
       'Warringah Mall', 'Rockhampton', 'Stanthorpe', 'Ringwood',
       'Buranda', 'Dandenong', 'Eastgardens', 'Camberwell', 'Hobart',
       'Burwood', 'Pakenham', 'Warrnambool', 'Chatswood', 'Rockingham',
       'Cairns', 'Armadale', 'Capalaba', 'Bega', 'Port Macquarie',
       'Glen Waverley', '

In [7]:
branch_likelihood  = branch_data[['Store','Likelihood To Shop Again']]

branch_likelihood = branch_likelihood.groupby(branch_likelihood['Store']).mean().sort_values(by = ['Likelihood To Shop Again'], ascending = False)


## Store vs Likelihood to Shop Again: 
1. 17 stores have a Likelihood score of 10. 
2. 18 stores have a Likelihood score less than 6. 
3. Rest of the stores have a score ranging between 6 to 9. 


In [8]:
branch_likelihood.plot(kind = 'bar', title = 'Average rating of Likelihood to Shop Again per branch')


In [9]:
print(len(branch_likelihood[branch_likelihood['Likelihood To Shop Again'] ==  10]), branch_likelihood[branch_likelihood['Likelihood To Shop Again'] == 10])

17               Likelihood To Shop Again
Store                                 
Portland                          10.0
Taree                             10.0
Ocean Grove                       10.0
Dubbo                             10.0
Glenquarie                        10.0
Glenn Innes                       10.0
Mowbray                           10.0
Sarina                            10.0
Shepparton                        10.0
Moree                             10.0
Gilgandra                         10.0
Deniliquin                        10.0
6545-Unknown                      10.0
Windsor                           10.0
Whyalla                           10.0
6386-Unknown                      10.0
5224-Unknown                      10.0


In [10]:
print(len(branch_likelihood[branch_likelihood['Likelihood To Shop Again'] < 6]), branch_likelihood[branch_likelihood['Likelihood To Shop Again'] < 6])

18                    Likelihood To Shop Again
Store                                      
Glendale                           5.727273
Warragul                           5.666667
Southport                          5.615385
Wagga Wagga                        5.500000
Cannot find store                  5.333333
Strathpine                         5.230769
Yeppoon                            5.181818
Katherine                          5.000000
Cannonvale                         5.000000
Forbes                             5.000000
Narracoorte                        5.000000
Ballina                            5.000000
Gladstone                          4.500000
Kalgoorlie                         4.444444
Kippa Ring                         4.400000
Victor Harbor                      4.285714
Yarrawonga                         4.000000
5000-Unknown                       3.000000


# Store VS Customer Satisfaction 
## From the previous stats analyis, we know that the highest customer satisfaction recorded was 8.
1. 167 Stores out of 261 have Customer Satisfaction score higher than 7 which holds 64% of Stores in business. 
2. 80 Stores out of 261 have Customer Satisfaction score of 8, which again is the highest ever recorded. This hold upto 31% of the stores. 
3. 15 Stores have Customer Satisfaction score less than 6. 

In [11]:
branch_customerSatisfaction  = branch_data[['Store', 'Customer Satisfaction Score']]
branch_customerSatisfaction = branch_customerSatisfaction.groupby(branch_customerSatisfaction['Store']).mean().sort_values(by = ['Customer Satisfaction Score'], ascending = False)

In [12]:
branch_customerSatisfaction.plot(kind = 'bar', title = 'Average rating of Customer Satisfaction per branch')

## Stores with Customer Staisfaction Score greater than 7

In [13]:
print(len(branch_customerSatisfaction[branch_customerSatisfaction['Customer Satisfaction Score'] > 7]),branch_customerSatisfaction[branch_customerSatisfaction['Customer Satisfaction Score'] > 7])

167              Customer Satisfaction Score
Store                                   
Hervey Bay                      8.000000
Mt. Ommaney                     8.000000
NT Outlet                       8.000000
Epping                          8.000000
Narracoorte                     8.000000
...                                  ...
Yeppoon                         7.090909
Charlestown                     7.074074
Tuggerah                        7.064516
Penrith                         7.058824
Bondi                           7.035714

[167 rows x 1 columns]


## Stores with highest Customer Staisfaction Score recorded.

In [14]:
print(len(branch_customerSatisfaction[branch_customerSatisfaction['Customer Satisfaction Score'] == 8]),branch_customerSatisfaction[branch_customerSatisfaction['Customer Satisfaction Score'] == 8])

80              Customer Satisfaction Score
Store                                   
Hervey Bay                           8.0
Mt. Ommaney                          8.0
NT Outlet                            8.0
Epping                               8.0
Narracoorte                          8.0
...                                  ...
Sarina                               8.0
Broome                               8.0
Busselton                            8.0
Ulladulla                            8.0
Runaway Bay                          8.0

[80 rows x 1 columns]


## Stores with Customer Staisfaction Score lower than 6.

In [15]:
print(len(branch_customerSatisfaction[branch_customerSatisfaction['Customer Satisfaction Score'] < 6]),branch_customerSatisfaction[branch_customerSatisfaction['Customer Satisfaction Score'] < 6])

15             Customer Satisfaction Score
Store                                  
Gympie                         5.750000
Benalla                        5.750000
Morisset                       5.666667
Dalby                          5.642857
Kalgoorlie                     5.555556
Bowen                          5.000000
Esperance                      5.000000
Forbes                         5.000000
Gladstone                      5.000000
Ballina                        5.000000
Ingham                         5.000000
Portland                       5.000000
Echuca                         5.000000
Camden                         4.400000
Bass Hill                      4.000000


# Exploring the stores with lower customer satisfaction and thier likelihood to shop again:
## For this we will be looking closely at the 15 stores that have a Customer Satisfaction Score lower than 6. 

In [16]:
branch_likelihood_cs = branch_data[['Store', 'Likelihood To Shop Again', 'Customer Satisfaction Score']]
branch_likelihood_cs = branch_likelihood_cs[branch_likelihood_cs['Customer Satisfaction Score'] < 6]
branch_likelihood_cs = branch_likelihood_cs.groupby(branch_likelihood_cs['Store']).mean().reset_index().sort_values(by = ['Customer Satisfaction Score'], ascending = False)

In [17]:
branch_likelihood_cs.describe()

Unnamed: 0,Likelihood To Shop Again,Customer Satisfaction Score
count,131.0,131.0
mean,6.269779,4.826732
std,2.129532,0.320148
min,1.0,4.0
25%,5.0,4.690476
50%,6.2,5.0
75%,7.732143,5.0
max,10.0,5.0


## What does the plot say?
### From the plot, seems like most stores have Likelihood score of 5 and above, even though the customer satisfaction score is low. 

In [18]:
px.bar(branch_likelihood_cs, x = 'Store', color = 'Customer Satisfaction Score', y= 'Likelihood To Shop Again', title= "Customer Satisfaction VS Likehihood to Shop Again(Store-Wise)", color_continuous_scale=px.colors.diverging.BrBG)

## Let's take a closer look at the stores that have Likelihood score lower than 5 for more detalied analysis.
### This plot shows us that, there is a corrleation between customer satisfaction score and likelihood to shop again. 
#### Stores Wagga Wagga, Blacktown, Tweed Heads and Kalgoorlie have Likelihood score of 3 with a customer satisfaction score of 4. 
#### Stores Southport, Earlville, Belconnen, Charlestown although having a decent customer satisfaction score 5, the likelihood to shop again reamins at 1. The same goes for Stores The Willows, Strathpine which have similar customer satisfaction scores but hold a likelihood score of 2. 

In [19]:
px.bar(branch_likelihood_cs[branch_likelihood_cs['Likelihood To Shop Again'] < 5], x = 'Store', color = 'Customer Satisfaction Score', y= 'Likelihood To Shop Again', title= "Customer Satisfaction VS Likehihood to Shop Again(Store-Wise)", color_continuous_scale=px.colors.diverging.BrBG)

## Branch-Wise Revenue Generation(Customer Expenditure)
### We'll be looking at Total Revenue the branches generated, to know which branch generates the most revenue. This can also answer the questions from customer satisfaction score analysis and see if the branches revenue generation depends on customer's satisfaction. 

In [20]:
branch_spend = branch_data[['Store', 'Total Spend']]
branch_spend['Total Spend'] = branch_spend['Total Spend'].str.strip('$')
branch_spend['Total Spend'] = pd.to_numeric(branch_spend['Total Spend'])

In [21]:
branch_spend = branch_spend.groupby(branch_spend['Store']).sum().reset_index().sort_values(by = ['Total Spend'], ascending = False)

## From the plot below, we can see the top revenue generating store as well as the least revenue generating stores. 
### But we might be wondering if customer staisfaction really matters when it comes to revenue generation. Well, lets find out. 

In [22]:
px.bar(branch_spend, x = 'Store', y = 'Total Spend', title = 'Revenue Generation(Total Customer Spendings) across branches')

In [23]:
customer_spend_satisfaction = branch_spend.merge(branch_customerSatisfaction, how = 'left', left_on= 'Store', right_on= 'Store')

### The below graph sparks some crucial insights. 
### The Stores to the left that generate more than 5k revenue have customer satisfaction score higher than 5. 
### Whereas the stores generating 5k or below have customer rating 5 or less. Yes, the stores to the very right that generated less than 1k in revenue have customer satisfaction score higher than 7, this is because the very few customers might have visited the store or only a handfull might have took part in rating the customer satisfaction. 

In [24]:
px.bar(customer_spend_satisfaction, x = 'Store', y = 'Total Spend', color = 'Customer Satisfaction Score', title = 'Revenue Generation(Total Customer Spendings) VS Customer Satisfaction across branches',color_continuous_scale=px.colors.diverging.BrBG)

## Let's visualize the same plot but the stores that generated revenue less than 5k. 
### There are a total of 126 stores that generated 5k or less in revenue. However there are only 10 stores that have customer satisfaction score of 5 or less. The Stores being Camden, Gladstone, Forbes, Bowen, Ballina, Portland, Ingham, Echuca, Esperance, Bass Hill. 


In [25]:
px.bar(customer_spend_satisfaction[customer_spend_satisfaction['Total Spend'] < 5001], x = 'Store', y = 'Total Spend', color = 'Customer Satisfaction Score', title = 'Revenue Generation(Total Customer Spendings) VS Customer Satisfaction across branches',color_continuous_scale=px.colors.diverging.BrBG)

In [26]:
print('Number of stores that generated more than 5k in revenue: ',len(customer_spend_satisfaction[customer_spend_satisfaction['Total Spend'] > 5000]),'\n Number of stores that generated 5k or less in revenue: ',len(customer_spend_satisfaction[customer_spend_satisfaction['Total Spend'] < 5001]))

Number of stores that generated more than 5k in revenue:  135 
 Number of stores that generated 5k or less in revenue:  126


In [27]:
customer_spend_satisfaction[(customer_spend_satisfaction['Total Spend'] < 5001) & (customer_spend_satisfaction['Customer Satisfaction Score'] <= 5)]

Unnamed: 0,Store,Total Spend,Customer Satisfaction Score
161,Camden,3092.13,4.4
187,Gladstone,2125.79,5.0
196,Forbes,1853.36,5.0
203,Bowen,1662.81,5.0
214,Ballina,1406.35,5.0
221,Portland,1187.34,5.0
224,Ingham,1143.15,5.0
226,Echuca,1090.19,5.0
238,Esperance,923.53,5.0
251,Bass Hill,546.05,4.0


## Analysing Pricing reviews among the branches
### From the plot below, a crucial insight can be noted. Customers find the pricing to be worse more often than same or better than competitors. 
### This is something that has to be addressed to improve customer satisfaction. 

In [28]:
branch_pricing = branch_data[['Store', 'Price']]
branch_pricing = pd.concat([branch_pricing, pd.get_dummies(branch_pricing.Price)], axis = 1)
branch_pricing = branch_pricing.drop(['Price'], axis = 1)
branch_pricing = branch_pricing.groupby(branch_pricing['Store']).sum().reset_index()

In [29]:
fig = go.Figure(data=[
    go.Bar(name='Same', x=branch_pricing['Store'], y=branch_pricing['Same']),
    go.Bar(name='Worse', x=branch_pricing['Store'], y=branch_pricing['Worse']),
    go.Bar(name='Better', x=branch_pricing['Store'], y=branch_pricing['Better'])

])

fig.update_layout(barmode='stack', title = 'Pricing across Stores')
fig.show()

## Analysing Availability of Products among branches 
### Seems like the case with Availability of the products is in a similar situation to that of Pricing, meaning customers find the availabilty to be worse more often than better or same compared to the competitors.

In [30]:
branch_availability = branch_data[['Store','Availability']]
branch_availability = pd.concat([branch_availability, pd.get_dummies(branch_availability.Availability)], axis = 1)
branch_availability = branch_availability.drop('Availability', axis = 1)
branch_availability = branch_availability.groupby(branch_availability['Store']).sum().reset_index()

In [31]:
fig = go.Figure(data=[
    go.Bar(name='Same', x=branch_availability['Store'], y=branch_availability['Same']),
    go.Bar(name='Worse', x=branch_availability['Store'], y=branch_availability['Worse']),
    go.Bar(name='Better', x=branch_availability['Store'], y=branch_availability['Better'])

])

fig.update_layout(barmode='stack', title = 'Availability across Stores')
fig.show()

## Analysing Customer Service across Stores
### No surpise here, as expected. If the pricing and availability are worse then it is expected that the customers find it annyoing and aren't happy with the customer service being provided. That being said, customers often find the customer service to be worse than better or same compared to the competitors. 

In [32]:
branch_service = branch_data[['Store', 'Service']]
branch_service = pd.concat([branch_service.iloc[:,0], pd.get_dummies(branch_service.Service)], axis = 1)
branch_service = branch_service.groupby(branch_service['Store']).sum().reset_index()

In [33]:
fig = go.Figure(data=[
    go.Bar(name='Same', x=branch_service['Store'], y=branch_service['Same']),
    go.Bar(name='Worse', x=branch_service['Store'], y=branch_service['Worse']),
    go.Bar(name='Better', x=branch_service['Store'], y=branch_service['Better'])

])

fig.update_layout(barmode='stack', title = 'Customer Service across Stores')
fig.show()

## Analysing access to Information across Stores
### As expected, customers find it hard(worse) in most cases, accessing to information across almost all the Stores. 

In [34]:
branch_information = branch_data[['Store','Information']]
branch_information = pd.concat([branch_information.iloc[:,0], pd.get_dummies(branch_information.Information)], axis = 1)
branch_information = branch_information.groupby(branch_information['Store']).sum().reset_index()

In [35]:
fig = go.Figure(data=[
    go.Bar(name='Same', x=branch_information['Store'], y=branch_information['Same']),
    go.Bar(name='Worse', x=branch_information['Store'], y=branch_information['Worse']),
    go.Bar(name='Better', x=branch_information['Store'], y=branch_information['Better'])

])

fig.update_layout(barmode='stack', title = 'Access to Information across Stores')
fig.show()

## Analyzing Sales of Division across branches
### There is almost an even distribution among the quantity of sales among house, baby and other divisions with apparel variying across stores. 
### Seems like products that belong to House, Baby and Other divison generate more revenue compared to Apparels 

In [36]:
branch_division = branch_data[['Store','Total Spend', 'Division']]
branch_division['Total Spend'] = pd.to_numeric(branch_division['Total Spend'].str.strip('$'))
branch_division = pd.concat([branch_division.iloc[:,0:2], pd.get_dummies(branch_division.Division)], axis = 1)
branch_division['Apparel'] = branch_division['Apparel'] * branch_division['Total Spend']
branch_division['Baby'] = branch_division['Baby'] * branch_division['Total Spend']
branch_division['House'] = branch_division['House'] * branch_division['Total Spend']
branch_division['Other'] = branch_division['Other'] * branch_division['Total Spend']
branch_division = branch_division.groupby(branch_division['Store']).sum().reset_index()

In [37]:
fig = go.Figure(data=[
    go.Bar(name='Apparel', x=branch_division['Store'], y=branch_division['Apparel']),
    go.Bar(name='Baby', x=branch_division['Store'], y=branch_division['Baby']),
    go.Bar(name='House', x=branch_division['Store'], y=branch_division['House']),
    go.Bar(name='Other', x=branch_division['Store'], y=branch_division['Other'])

])

fig.update_layout(barmode='stack', title = 'Sales of Products by Division across Stores')
fig.show()

### Let's find out how customers find the availability of the products, this time based on divisions

In [38]:
division_availability = branch_data[['Division','Availability']]
division_availability = pd.concat([division_availability.iloc[:,0] , pd.get_dummies(division_availability.Availability)], axis= 1 )
division_availability = division_availability.groupby(division_availability['Division']).sum().reset_index()

### The availability across all divisons seems to be a major issue and needs to be improved in-order to generate more revenue and positive feedback from the customers. 
### From the plot, it is evident that customers majorly look for apparales but often find the avaialability worse compared to the competitors. 

In [53]:
fig = go.Figure(data=[
    go.Bar(name='Same', x=division_availability['Division'], y=division_availability['Same']),
    go.Bar(name='Worse', x=division_availability['Division'], y=division_availability['Worse']),
    go.Bar(name='Better', x=division_availability['Division'], y=division_availability['Better'])

])

fig.update_layout(barmode='group', title = 'Availability of Products across Divisons')
fig.show()

## Finally, lets anaylze how customers find the service and information segmented by their age groups. This could give an insight on what age group needs to be focused on more. 

In [40]:
def get_age_data(x): 
    df = customer_data[['Age Group', x]]
    df = pd.concat([df.iloc[:,0], pd.get_dummies(df[x])], axis = 1)
    df = df.groupby(df['Age Group']).sum().reset_index()
    return df

In [50]:
def plot_age(df, text): 
    fig = go.Figure(data=[
    go.Bar(name='Same', x=df['Age Group'], y=df['Same']),
    go.Bar(name='Worse', x=df['Age Group'], y=df['Worse']),
    go.Bar(name='Better', x=df['Age Group'], y=df['Better'])

    ])

    fig.update_layout(barmode='group', title = text)
    fig.show()

# Analysis from the below two plots: 
### After looking at the two plots, looks like the Customer Service is found to be worse when the access to the Information is worse in the Stores. This is also something to keep in mind in order to help improve the customer relations.
### With more focus put towards the 60+ age group as they visit the store more often than other age groups.
### And also as elderly customers find it hard to locate information and are easily dissatisfied with the customer service. 

In [51]:
age_service = get_age_data('Service')
plot_age(age_service, "Customer Service segmented by Age Groups")

In [52]:
age_information = get_age_data('Information')
plot_age(age_information, "Access to Information segmented by Age Groups")

## Anlaysis on Customer Comments 
### Customers seem to be satisfied with the Prodcut Quality, given the product's availability.(As availabilty of products was proven to be worse from previous analysis done on Availability.)
### Customers seem to be dissatisfied in general with the service being provided by the Stores. 
### Coming to the major area of dissatisfaction of customers, it seems that customers are not satisfied by their In-Store shopping experiences. Most of the comments are based on making or suggesting In-Store Improvements across all divisions of products. This could again involve the lower availability of prodcuts across the divisions. 

In [47]:
question_division = customer_data[['Question','Division']]
question_division = pd.concat([question_division.iloc[:,0], pd.get_dummies(question_division.Division)], axis = 1)
question_division = question_division.groupby(question_division['Question']).sum().reset_index()

In [49]:
fig = go.Figure(data=[
    go.Bar(name='Apparel', x=question_division['Question'], y=question_division['Apparel']),
    go.Bar(name='Baby', x=question_division['Question'], y=question_division['Baby']),
    go.Bar(name='House', x=question_division['Question'], y=question_division['House']),
    go.Bar(name='Other', x=question_division['Question'], y=question_division['Other'])

])

fig.update_layout(barmode='group', title = 'Customer Comments by Division')
fig.show()

## Analyzing if Bad Availability of products is due to not being restocked
### From the plot below, it is evident that the availability is just poor at all business times. If the poor(worse) availability were to spike at certain time of the day, then that could be because of not restocking the shelves. But seems as though, the prodcuts are not in stock to be restocked, hence customers tend to find the availability to be poor at all times of the day. 

In [55]:
timeofday_availability = customer_data[['Time Of Day', 'Availability']]
timeofday_availability = pd.concat([timeofday_availability.iloc[:,0], pd.get_dummies(timeofday_availability.Availability)], axis = 1)
timeofday_availability = timeofday_availability.groupby(timeofday_availability['Time Of Day']).sum().reset_index()

In [56]:
fig = go.Figure(data=[
    go.Bar(name='Same', x=timeofday_availability['Time Of Day'], y=timeofday_availability['Same']),
    go.Bar(name='Worse', x=timeofday_availability['Time Of Day'], y=timeofday_availability['Worse']),
    go.Bar(name='Better', x=timeofday_availability['Time Of Day'], y=timeofday_availability['Better'])

])

fig.update_layout(barmode='group', title = 'Availability of Products VS Time Of Day')
fig.show()