In [1]:
import pandas as pd
from collections import Counter
import plotly.express as px
import plotly.graph_objects as go

# Take a look at the data

We will fetch data from the BigQuery table `enriched.reviews_sentiment_analysis` and focus on the following columns:

- `sentiment_classification`: Overall review classification (`Positive`/`Neutral`/`Negative`)
- `sentiment_friendly`: Was the driver friendly (`-1` unfriendly, `0` neutral, `1` friendly)
- `sentiment_professional`: Was the driver professional (`-1` unprofessional, `0` neutral, `1` professional)
- `sentiment_communication`: Communication was ok (`-1` talked too much/little, `0` neutral, `1` ok)
- `sentiment_safe`: Customer felt safe (`-1` felt unsafe, `0` neutral, `1` felt safe)
- `sentiment_comfortable`: Customer felt comfortable (`-1` uncomfortable, `0` neutral, `1` comfortable),
- `sentiment_temperature`: Temperature was ok (`-1` temp too high/low, `0` neutral, `1` ok)
- `sentiment_speed`: Car speed was ok (`-1` drove too fast/slow, `0` neutral, `1` ok)
- `sentiment_clean`: Car was clean (`-1` dirty, `0` neutral, `1` ok)
- `sentiment_size`: Car size ok (`-1` too small/big, `0` neutral, `1` ok)
- `sentiment_recommend`: Customer would recommend (`-1` not recommend, `0` neutral, `1` recommend)

In [2]:
%%bigquery df
SELECT * FROM enriched.reviews_sentiment_analysis

Query is running:   0%|          |

Downloading:   0%|          |

In [None]:
theme_columns=['sentiment_recommend', 'sentiment_friendly', 'sentiment_professional', 'sentiment_communication', 'sentiment_safe','sentiment_comfortable', 'sentiment_temperature', 'sentiment_speed', 'sentiment_clean', 'sentiment_size']
theme_cols = ['Customer would recommend', 'Driver was friendly ', 'Driver was professional', 'Communication was ok', 'Customer felt safe','Customer felt comfortable', 'Temperature was ok', 'Car speed was ok', 'Car was clean', 'Car size was ok']
other_columns, other_cols = ['sentiment_classification', 'sentiment_reasons','customer_review_text_transcript'], ['Classification', 'Reasons', 'Review']

df = df.rename(columns={k:v for k,v in zip(theme_columns+other_columns, theme_cols+other_cols)})

display(df[theme_cols].describe())

Unnamed: 0,Customer would recommend,Driver was friendly,Driver was professional,Communication was ok,Customer felt safe,Customer felt comfortable,Temperature was ok,Car speed was ok,Car was clean,Car size was ok
count,496.0,496.0,496.0,496.0,496.0,496.0,496.0,496.0,496.0,496.0
mean,0.274194,0.451613,0.457661,0.336694,0.112903,0.169355,-0.024194,0.024194,0.080645,0.016129
std,0.853643,0.706161,0.703686,0.72303,0.495795,0.654085,0.439719,0.499919,0.502065,0.297705
min,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
25%,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# How are the reviews distributed?

In [None]:
classif_df = df.groupby(['Classification'])\
                  .agg(**{'Number of Reviews': ('Classification',"count")})\
                  .reset_index()\
                  .sort_values(by='Classification', ascending=False)

px.bar(classif_df, x='Classification', y='Number of Reviews', title="Distribution of Classifications",
       color="Classification", color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'yellow'})

From this first chart we can conclude that the customers who submitted feedback had strong feelings about the the service, with only a small minority being neutral. The majority of the customers liked the service.

# How do the themes in the reviews fare in each classification?

Sum the individual review scores for each theme grouping them by classification (Positive/Neutral/Negative):

- `Negative`: the sum of the scores for each theme where the review was classified as Negative
- `Neutral`: the sum of the scores for each theme where the review was classified as Neutral
- `Positive`: the sum of the scores for each theme where the review was classified as Positive

In [None]:
summary_df = pd.melt(df[theme_cols+["Classification"]], id_vars=['Classification'], var_name='Classification_type', value_name='value')\
               .groupby(['Classification', 'Classification_type'])['value'].sum().reset_index()\
               .pivot(index='Classification_type', columns='Classification', values='value')\
               .reset_index()\
               .fillna(0)\
               .rename(columns={'Classification_type':'Theme'})\
               .sort_values(by='Positive', ascending=False)

display(summary_df)

plot_df = summary_df.melt(id_vars=['Theme'], var_name='Classification', value_name='value')
plot_df['Positive'] = plot_df.apply(lambda row: summary_df[summary_df['Theme'] == row['Theme']]['Positive'].values[0], axis=1)
plot_df['Negative'] = plot_df.apply(lambda row: summary_df[summary_df['Theme'] == row['Theme']]['Negative'].values[0], axis=1)
plot_df['Neutral'] = plot_df.apply(lambda row: summary_df[summary_df['Theme'] == row['Theme']]['Neutral'].values[0], axis=1)
plot_df['Classification'] = pd.Categorical(plot_df['Classification'], categories=['Positive', 'Neutral', 'Negative'], ordered=True)
plot_df = plot_df.sort_values(['Positive', 'Classification'], ascending=[False,True])

px.bar(plot_df, x='Theme', y='value', color='Classification', title="Cumulative Scores for each Theme (grouped by Classification)",
    color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'yellow'},
    hover_data={'value': False}, custom_data=['Positive', 'Neutral', 'Negative'])\
  .update_traces(hovertemplate="<b>%{x}</b><br><br>Positive: %{customdata[0]}<br>Neutral: %{customdata[1]}<br>Negative: %{customdata[2]}")\
  .update_layout(xaxis_title='Theme',yaxis_title='Sum of Values',legend_title_text='Classification')\
  .update_xaxes(tickvals=summary_df['Theme'], ticktext=['<br>'.join(label.split()) for label in summary_df['Theme']])


Classification,Theme,Negative,Neutral,Positive
8,Driver was professional,-57,2,282
7,Driver was friendly,-50,2,272
6,Customer would recommend,-131,0,267
3,Communication was ok,-57,1,223
4,Customer felt comfortable,-70,1,153
5,Customer felt safe,-35,1,90
2,Car was clean,-20,-4,64
1,Car speed was ok,-45,-2,59
0,Car size was ok,-17,0,25
9,Temperature was ok,-32,0,20


From the provided chart, we can draw several interesting conclusions about the themes based on the cumulative scores for each review classification:

1. **Overall Positive Feedback**:
   - Themes such as "Driver was professional," "Driver was friendly," and "Customer would recommend" received the highest cumulative positive scores. This suggests that customers generally had very favorable experiences in these areas.
   
2. **Areas for Improvement**:
   - The theme "Communication was ok" has noticeable negative scores. This indicates that communication is an area that could be improved as a significant number of customers were dissatisfied.
   - "Customer felt comfortable" and "Customer felt safe" also have negative scores, although not as prominent as communication. This suggests that while many customers felt positive about these aspects, a notable minority had concerns.

3. **Mixed Reviews**:
   - "Car speed was ok" and "Car size was ok" show relatively balanced scores between positive and negative reviews. This indicates that customer opinions are quite mixed, and these areas might require a closer look to understand the varying experiences.

4. **Neutral Responses**:
   - The neutral scores are relatively low across all themes, suggesting that customers tend to have strong opinions (either positive or negative) rather than feeling indifferent.

5. **Theme Importance**:
   - The height of the bars indicates the overall volume of feedback in each theme. "Driver was professional" and "Driver was friendly" not only received high positive scores but also appear to be the most discussed themes, highlighting their importance to customers.


# Analyse the reasons

Let's inspect the reasons behind the reviews

In [None]:
reasons_df = df[['Reasons', 'Classification', *theme_cols]].explode("Reasons")

reasons_df

Unnamed: 0,Reasons,Classification,Customer would recommend,Driver was friendly,Driver was professional,Communication was ok,Customer felt safe,Customer felt comfortable,Temperature was ok,Car speed was ok,Car was clean,Car size was ok
248,Driver was efficient,Positive,1,1,1,1,0,0,0,1,0,0
248,Driver was chatty,Positive,1,1,1,1,0,0,0,1,0,0
248,Customer would recommend,Positive,1,1,1,1,0,0,0,1,0,0
341,Driver was punctual,Positive,1,1,1,0,0,1,0,0,1,0
341,Driver was friendly,Positive,1,1,1,0,0,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
92,Customer would not recommend,Negative,-1,0,0,1,0,0,0,-1,0,0
0,Temperature was too hot,Negative,-1,-1,-1,-1,0,-1,-1,0,0,0
0,Driver was annoying,Negative,-1,-1,-1,-1,0,-1,-1,0,0,0
0,Driver talked excessively,Negative,-1,-1,-1,-1,0,-1,-1,0,0,0


In [None]:
aggs = {col: (col,'sum') for col in theme_cols}; aggs.update({'Count':('Classification', 'count')})
reasons_summary_df = reasons_df.groupby(['Classification','Reasons']).agg(**aggs).reset_index()\
  .sort_values(['Classification', 'Count'], ascending=[False, False]).rename(columns={'Reasons':'Reason'})

# discard the recommendation as a reason for being recommended (does not remove the "Customer would recommend" column, just these two entries in the "Reason" column)
reasons_summary_df = reasons_summary_df[~reasons_summary_df['Reason'].isin(['Customer would recommend', 'Customer would not recommend'])]

reasons_summary_df

Unnamed: 0,Classification,Reason,Customer would recommend,Driver was friendly,Driver was professional,Communication was ok,Customer felt safe,Customer felt comfortable,Temperature was ok,Car speed was ok,Car was clean,Car size was ok,Count
266,Positive,Driver was friendly,199,241,237,183,57,114,11,37,53,19,241
168,Positive,Car was clean,67,70,72,58,13,59,7,7,76,14,88
284,Positive,Driver was punctual,62,56,56,44,24,28,2,15,17,4,72
170,Positive,Car was comfortable,55,57,56,42,13,68,12,5,37,15,68
282,Positive,Driver was professional,49,53,54,47,26,27,6,17,12,2,54
...,...,...,...,...,...,...,...,...,...,...,...,...,...
119,Negative,Ride was awkward,-1,-1,-1,-1,0,0,0,0,0,0,1
122,Negative,Seats were uncomfortable,-1,0,0,1,-1,-1,0,-1,-1,0,1
126,Negative,Truck was too large,-1,0,0,0,0,-1,0,0,0,-1,1
129,Negative,Vehicle was difficult to maneuver,0,1,0,0,0,0,0,0,0,-1,1


In [None]:
px.bar(reasons_summary_df[(reasons_summary_df['Classification']=='Positive')][:50], x='Reason',y='Count', color='Classification',
       color_discrete_map={'Positive':'green'}, title='Top 50 Reasons in Positive Reviews')

Here are some interesting conclusions that can be derived from the data:

1. **Driver Attributes Dominate Positive Reviews**: The most frequent reasons for positive reviews are heavily focused on driver attributes, such as "Driver was friendly," "Driver was clean," "Driver was punctual," and "Driver was professional." This suggests that the quality of the driver plays a crucial role in customer satisfaction.

2. **Cleanliness is Key**: "Car was clean" is the second most common reason for positive reviews. This highlights the importance of maintaining a clean vehicle for rideshare services, as it significantly impacts the customer's perception and satisfaction.

3. **Timeliness Matters**: The reason "Driver was punctual" appears prominently, indicating that timely pickups and drop-offs are essential for positive customer experiences.

4. **Comfort and Professionalism**: "Driver was comfortable" and "Driver was professional" are also frequently mentioned. This underscores the importance of providing a comfortable ride and maintaining a professional demeanor to enhance customer satisfaction.

5. **Communication and Helpfulness**: Reviews frequently mention attributes like "Driver was helpful," "Driver was communicative," and "Driver was attentive." This indicates that effective communication and a willingness to assist passengers are highly valued by customers.

6. **Safety and Smooth Ride**: Comments such as "Ride was smooth," "Ride was safe," and "Ride was comfortable" suggest that the overall quality of the ride itself is crucial for positive reviews.

7. **Secondary Factors**: While less common, factors like "Driver was chitchatty," "Temperature was OK," and "Driver offered extras" show that secondary elements, such as the interaction with the driver and in-car amenities, also contribute to positive experiences but to a lesser extent.

8. **Attention to Minor Details**: Factors such as "Car could be cleaner," "Driver was courteous," and "No loud music" highlight that attention to minor details can significantly impact customer satisfaction, suggesting that addressing these aspects could improve overall reviews.

In [None]:
px.bar(reasons_summary_df[(reasons_summary_df['Classification']=='Negative')][:50], x='Reason',y='Count', color='Classification',
       color_discrete_map={'Negative':'red'}, title='Top 50 Reasons in Negative Reviews')

Some more conclusions from this data:

1. **Temperature Issues**: "Temperature was too hot" is the most frequent reason for negative reviews, suggesting that climate control within the vehicle is a significant factor in customer dissatisfaction.

2. **Driver Communication and Behavior**: Negative reviews often cite issues such as "Driver did not offer to turn on the radio," "Driver was rude," "Driver was not communicative," and "Driver ignored the customer." This indicates that poor communication and perceived rudeness from the driver significantly contribute to negative customer experiences.

3. **Punctuality and Speed**: "Driver was late" and "Driver drove too fast" are common complaints. This suggests that both punctuality and safe driving speeds are critical factors for customer satisfaction. Conversely, "Driver drove too slow" also appears, indicating that driving too slowly can be just as problematic as driving too fast.

4. **Cleanliness and Comfort**: "Car was dirty" and "Customer felt uncomfortable" are notable reasons for negative reviews, emphasizing the importance of vehicle cleanliness and overall passenger comfort.

5. **Safety Concerns**: "Driver was speeding," "Customer felt unsafe," and "Driver was distracted" are significant issues mentioned. This highlights that perceived safety during the ride is crucial for positive reviews.

6. **Driver's Attentiveness and Professionalism**: Complaints such as "Driver was not attentive," "Driver talked excessively," and "Driver was unprofessional" suggest that the level of professionalism and attentiveness displayed by the driver is a key factor in customer satisfaction.

7. **Vehicle Conditions**: Comments like "Car was too old," "Car was too small," and "Car was filthy" show that the condition and size of the vehicle are important factors in customer reviews.

8. **Secondary Factors**: While less common, factors such as "Radio was too loud," "Vehicle was inadequate," and "Customer did not relax" highlight that secondary elements can also impact customer satisfaction negatively, though to a lesser extent.

9. **Customer Expectations**: Issues like "Customer was terrified," "Customer was not happy," and "Customer preferred other rides" suggest that customer expectations and previous experiences can influence their perception and satisfaction with the ride.


In [None]:
px.bar(reasons_summary_df[reasons_summary_df['Customer would recommend']>=5].sort_values('Customer would recommend', ascending=False)[:10],
       x='Reason', y='Customer would recommend',  color='Classification', color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'yellow'},
       title='Top 10 Reasons Customers Would Recommend')

Conclusions from the customers that recommend the service:

1. **Driver Friendliness is Paramount**: The most common reason for recommendations is "Driver was friendly," which overwhelmingly stands out. This suggests that a friendly demeanor from the driver significantly influences customer recommendations.

2. **Cleanliness and Punctuality**: "Car was clean" and "Driver was punctual" are also prominent reasons. This indicates that maintaining a clean vehicle and ensuring punctual service are crucial factors in gaining customer recommendations.

3. **Comfort and Professionalism**: "Car was comfortable," "Driver was professional," and "Ride was comfortable" are important factors. This shows that both the comfort of the ride and the professional behavior of the driver contribute significantly to positive word-of-mouth.

4. **Communication and Helpfulness**: "Driver was communicative" and "Driver was helpful" are frequently mentioned. Effective communication and a willingness to assist passengers are key to earning customer recommendations.

5. **Driver Attentiveness and Safety**: "Driver was attentive" and "Driver was safe" highlight the importance of the driver's attentiveness and the perceived safety of the ride in influencing recommendations.

In [None]:
px.bar(reasons_summary_df[reasons_summary_df['Customer would recommend']<=-5].sort_values('Customer would recommend', ascending=True)[:10],
       x='Reason', y='Customer would recommend',  color='Classification', color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'yellow'},
       title='Top 10 Reasons Customers Would Not Recommend')

Conclusions from the customers that DO NOT recommend the service:

1. **Temperature Issues**: "Temperature was too hot" is a leading reason for not recommending the service. This emphasizes the importance of maintaining a comfortable temperature within the vehicle to avoid customer dissatisfaction.

2. **Driver Communication and Behavior**: Reasons such as "Driver did not offer to turn on the radio," "Driver was rude," "Driver was not communicative," and "Driver ignored the customer" indicate that poor communication and perceived rudeness from the driver are significant factors in negative recommendations.

3. **Cleanliness**: "Car was dirty" is a prominent complaint, highlighting that the cleanliness of the vehicle is crucial for customer satisfaction and recommendations.

4. **Safety Concerns**: "Driver was speeding" and "Customer felt unsafe" are key reasons for not recommending the service. This suggests that perceived safety during the ride is a critical factor for customers.

5. **Driver Attentiveness and Professionalism**: "Driver was not attentive" and "Driver drove too fast" suggest that the driver's attentiveness and adherence to safe driving practices are important for positive customer experiences.

6. **Lack of Comfort**: Several reasons indicate a lack of comfort during the ride, such as the temperature being too hot and the driver not offering to turn on the radio.


In [None]:
px.bar(reasons_summary_df[reasons_summary_df['Customer felt safe']>=5].sort_values('Customer felt safe', ascending=False)[:10],
       x='Reason', y='Customer felt safe',  color='Classification', color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'yellow'},
       title='Top 10 Reasons for Customer Feeling Safe')

About the feeling of safety:

1. **Driver Friendliness**: "Driver was friendly" is the most common reason for customers feeling safe, suggesting that a friendly demeanor from the driver significantly contributes to a sense of security for passengers.

2. **Perception of Safety**: "Driver was safe" is the second most frequent reason, indicating that the driver's adherence to safe driving practices is crucial for customer safety perception.

3. **Professionalism and Attentiveness**: "Driver was professional" and "Driver was attentive" are also important factors. This highlights the importance of the driver's professional behavior and attentiveness in making passengers feel safe.

4. **Punctuality**: "Driver was punctual" suggests that timely service contributes to the perception of safety, possibly because it indicates reliability and organization.

5. **Helpfulness**: "Driver was helpful" appears as a reason, showing that a driver's willingness to assist passengers enhances their sense of safety.

6. **Driving Practices**: "Driver drove safely" specifically addresses the driving behavior, underscoring the importance of safe driving practices in customer safety perception.

7. **Comfort and Cleanliness**: "Car was comfortable" and "Car was clean" suggest that the condition and cleanliness of the vehicle also play roles in making passengers feel safe.

8. **Communication**: "Driver was communicative" indicates that effective communication from the driver helps in building a sense of safety among passengers.

In [None]:
px.bar(reasons_summary_df[reasons_summary_df['Customer felt safe']<=-3].sort_values('Customer felt safe', ascending=True)[:10],
       x='Reason', y='Customer felt safe',  color='Classification', color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'yellow'},
       title='Top 10 Reasons for Customer Not Feeling Safe')

The chart presents the top 10 reasons why customers do not feel safe in a rideshare service. Here are some interesting conclusions that can be derived from the data:

1. **Speed and Reckless Driving**: "Driver drove too fast" and "Driver was speeding" are the most common reasons for customers not feeling safe. This highlights that excessive speed and reckless driving are major factors contributing to customer fear and a lack of safety.

2. **General Feeling of Unsafety and Fear**: "Customer felt unsafe" and "Customer was scared" are prominent reasons, indicating that the overall perception of safety during the ride is critical. This could be influenced by various factors, including driving behavior and environmental factors.

3. **Driver Recklessness**: "Driver was reckless" is another significant reason, further emphasizing the impact of unsafe driving practices on customer safety perceptions.

4. **Driver Distraction and Inattentiveness**: "Driver was distracted" and "Driver was not attentive" suggest that drivers who are not fully focused on driving cause passengers to feel unsafe.

5. **Driver Communication and Behavior**: "Driver ignored the customer," "Driver was not communicative," and "Driver was rude" indicate that poor communication and perceived rudeness from the driver contribute to feelings of unsafety.

# Conclusion

The analysis of rideshare reviews reveals several key factors that influence customer satisfaction, safety perceptions, and recommendations. The primary drivers for positive experiences include friendly and professional driver behavior, cleanliness and comfort of the vehicle, punctuality, and effective communication. Conversely, negative experiences are often driven by issues related to temperature control, poor communication and rudeness from the driver, lack of cleanliness, unsafe driving practices, and overall discomfort.

Customers value a friendly, communicative, and professional driver who maintains a clean and comfortable vehicle and adheres to safe driving practices. Negative reviews highlight the critical impact of unsafe driving behaviors, poor communication, and a lack of attentiveness on customer satisfaction.

### Actionable Items to Improve the Service

1. **Driver Training and Behavior**:
   - **Friendliness and Professionalism**: Implement training programs focused on enhancing driver friendliness, professionalism, and communication skills.
   - **Attentiveness and Helpfulness**: Emphasize the importance of being attentive and helpful to passenger needs.

2. **Safety and Driving Practices**:
   - **Safe Driving Training**: Conduct regular training sessions on safe driving practices, including adhering to speed limits and avoiding reckless driving behaviors.
   - **Monitoring and Feedback**: Use monitoring systems to track driving behaviors and provide feedback to drivers, encouraging safe and responsible driving.

3. **Vehicle Maintenance and Cleanliness**:
   - **Regular Cleaning**: Establish a routine for regular and thorough cleaning of vehicles to ensure a clean and comfortable environment for passengers.
   - **Maintenance Checks**: Perform regular maintenance checks to ensure all vehicles are in good condition and meet safety standards.

4. **Punctuality and Reliability**:
   - **Time Management Training**: Provide training on effective time management and route planning to help drivers be more punctual.
   - **System Improvements**: Optimize the scheduling and dispatch system to minimize delays and improve reliability.

5. **Temperature Control and Comfort**:
   - **Climate Control Systems**: Ensure all vehicles have functioning climate control systems and encourage drivers to adjust temperatures according to passenger preferences.
   - **Passenger Comfort**: Provide guidelines for maintaining a comfortable and pleasant in-car environment.

6. **Effective Communication**:
   - **Driver-Passenger Interaction**: Encourage drivers to communicate effectively with passengers, including offering amenities such as adjusting the radio or providing information about the trip.
   - **Feedback Mechanism**: Implement a robust feedback mechanism to gather passenger input and address any communication-related issues promptly.

By focusing on these actionable items, the rideshare service can enhance overall customer satisfaction, improve safety perceptions, and increase the likelihood of positive recommendations. These improvements will lead to a better experience for passengers and a more successful and reliable service.