In [None]:
import numpy as np
import pandas as pd
from collections import defaultdict

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

params = {'legend.fontsize': 'small',
          'figure.figsize': (10, 8),
         'axes.labelsize': 'small',
         'axes.titlesize':'small',
         'xtick.labelsize':'small',
         'ytick.labelsize':'small'}
plt.rcParams.update(params)


import importlib

In [None]:
dem_survey_recent = pd.read_csv("DemographicData.csv")
len(dem_survey_recent)

In [None]:
gender_counts = dem_survey_recent[['What_is_your_gender', 'user_id']]
gender_counts = gender_counts.groupby('What_is_your_gender').count()
gender_counts

In [None]:
gender_challenge = dem_survey_recent[['What_is_your_gender', 'Challenges_to_travel']]
gender_challenge = gender_challenge.dropna(subset=['Challenges_to_travel','What_is_your_gender'])

challenge_counts = {'man':{}, 'woman':{}}
def count_instances(row):
    if len(row['Challenges_to_travel']) > 0:
        vals = row['Challenges_to_travel'].split(' ')
        for item in vals:
            if item in challenge_counts[row['What_is_your_gender']].keys():
                challenge_counts[row['What_is_your_gender']][item] = challenge_counts[row['What_is_your_gender']][item] + 1
            else:
                challenge_counts[row['What_is_your_gender']][item] = 1

gender_challenge.apply(lambda x: count_instances(x), axis=1)

In [None]:
man_challenge_df = pd.DataFrame.from_dict(challenge_counts['man'], orient='index', columns=['man'])
man_challenge_df.index.names = ['Challenge']
man_challenge_df = man_challenge_df.reset_index()
man_challenge_df['Gender'] = 'Man'
man_challenge_df['proportion'] = man_challenge_df['man'] / np.sum(man_challenge_df['man'])
man_challenge_df = man_challenge_df.rename(columns={"Challenge": "Challenge", "man": "Count", "proportion":"Proportion"})

woman_challenge_df = pd.DataFrame.from_dict(challenge_counts['woman'], orient='index', columns=['woman'])
woman_challenge_df.index.names = ['Challenge']
woman_challenge_df = woman_challenge_df.reset_index()
woman_challenge_df['Gender'] = 'Woman'
woman_challenge_df['proportion'] = woman_challenge_df['woman'] / np.sum(woman_challenge_df['woman'])
woman_challenge_df = woman_challenge_df.rename(columns={"Challenge": "Challenge", "woman": "Count", "proportion":"Proportion"})

challenge_df = pd.concat([woman_challenge_df, man_challenge_df])
challenge_df = challenge_df.reset_index()
challenge_df = challenge_df.replace({'Challenge': {
    'high_cost_of_tickets_or_fuel': 'High Cost', 
    'lack_of_reliability___timing_or_scheduli': 'Lack of reliability',
    'convenience___lack_of_connectivity_acros': 'Convenience',
    'risk_of_road_accidents___the_roads_are_d': 'Risk of road accidents',
    'personal_safety___often_feel_unsafe_whil': 'Personal safety',
    'none_of_the_above': 'None of the above',
    'access___difficulty_connecting_between_t': 'Access'
    }})
challenge_df

In [None]:
width = 0.8
fig, ax = plt.subplots(figsize=(10, 6))
running_total = [0,0]
fig_data = challenge_df.copy()

for challenge in pd.unique(fig_data.Challenge):
    band_data = fig_data[fig_data['Challenge']==challenge]
    
    labels = band_data['Gender']
    vals = band_data['Proportion']*100
    bar_labels = band_data['Count']
    
    vals_str = [f'{y:.1f} %\n({x:,})' if y>7 else '' for x, y in zip(bar_labels, vals)]
    bar = ax.barh(labels, vals, width, left=running_total, label=challenge)
    ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=12)
    running_total[0] = running_total[0]+vals.iloc[0]
    running_total[1] = running_total[1]+vals.iloc[1]

ax.set_title('Challenges for Travel', fontsize=25)
ax.legend(bbox_to_anchor=(1,1), fancybox=True, shadow=True, fontsize=12)
plt.subplots_adjust(bottom=0.20)
fig.tight_layout()
plt.show()

In [None]:
challenge_df.groupby('Gender').sum()