In [1]:
import plotly

In [23]:
import pandas as pd     #(version 1.0.0)
import plotly           #(version 4.5.4) #pip install plotly==4.5.4
import plotly.express as px
import plotly.io as pio
import os

# excel sheet from https://www.kaggle.com/rajanand/prison-in-india/data
# National Crime Records Bureau (NCRB), Govt of India has shared this dataset
df = pd.read_csv(os.path.join('data',"Caste.csv"))
df = df[df['state_name']=='Maharashtra']
df.head(10)

Unnamed: 0,state_name,is_state,year,gender,caste,convicts,under_trial,detenues,others
636,Maharashtra,1,2001,Male,SC,1383,4025,113,1
637,Maharashtra,1,2001,Male,ST,1027,2573,58,0
638,Maharashtra,1,2001,Male,OBC,1680,2867,87,3
639,Maharashtra,1,2001,Male,Others,2343,5398,106,0
640,Maharashtra,1,2002,Male,SC,1346,2942,61,0
641,Maharashtra,1,2002,Male,ST,1169,2308,37,0
642,Maharashtra,1,2002,Male,OBC,1855,3120,53,0
643,Maharashtra,1,2002,Male,Others,2517,5499,148,0
644,Maharashtra,1,2003,Male,SC,1451,2517,35,3
645,Maharashtra,1,2003,Male,ST,1199,2331,23,5


In [59]:
df_gender = df.groupby(['year','gender',], as_index=False)['detenues','under_trial','convicts','others'].sum()
df_gender.head(10)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0,year,gender,detenues,under_trial,convicts,others
0,2001,Female,1,583,279,0
1,2001,Male,364,14863,6433,4
2,2002,Female,8,648,311,0
3,2002,Male,299,13869,6887,0
4,2003,Female,1,728,390,0
5,2003,Male,188,14776,7825,21
6,2004,Female,0,772,484,0
7,2004,Male,170,15012,8447,37
8,2005,Female,1,862,463,0
9,2005,Male,234,15084,8766,21


In [29]:
# plot on a bar chart

barchart = (px.bar(
    data_frame=df,
    x='year',
    y='convicts',
    color='gender',
    opacity=0.9,
    orientation='v',
    barmode='relative'))
            
pio.show(barchart)

In [48]:
# Improve the barchart with templates, labels, width, height

templates=['ggplot2', 'seaborn', 'simple_white', 'plotly',
         'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
         'ygridoff', 'gridon', 'none']

barchart = (px.bar(
    data_frame=df,
    x='year',
    y='convicts',
    color='gender',
    opacity=0.9,
    orientation='v',
    barmode='overlay', #barmode can be 'group', 'overlay' or 'relative'
    labels={'convicts':'Convicts in Maharashtra',
           'gender': 'Gender'},
    title='Indian Prison Statistics',
    width=950,
    height=720,
    template='presentation')) 

pio.show(barchart)

In [49]:
# improve plot using face row 

barchart = (px.bar(
    data_frame=df,
    x='year',
    y='convicts',
    color='gender',
    opacity=0.9,
    orientation='v',
    barmode='overlay', #barmode can be 'group', 'overlay' or 'relative'
    labels={'convicts':'Convicts in Maharashtra',
           'gender': 'Gender'},
    title='Indian Prison Statistics',
    width=950,
    height=720,
    template='presentation',
    facet_row='caste')) 

pio.show(barchart)


In [55]:
# improve plot using face row 

barchart = (px.bar(
    data_frame=df,
    x='year',
    y='convicts',
    color='gender',
    opacity=0.9,
    orientation='v',
    barmode='overlay', #barmode can be 'group', 'overlay' or 'relative'
    labels={'convicts':'Convicts in Maharashtra',
           'gender': 'Gender'},
    title='Indian Prison Statistics',
    width=950,
    height=720,
    template='presentation',
    facet_col='caste',
    facet_col_wrap=2)) 

pio.show(barchart)

In [60]:
# Improving the colour with color_discrete_map

barchart = (px.bar(
    data_frame=df,
    x='year',
    y='convicts',
    color='gender',
    opacity=0.9,
    orientation='v',
    barmode='overlay', #barmode can be 'group', 'overlay' or 'relative'
    labels={'convicts':'Convicts in Maharashtra',
           'gender': 'Gender'},
    title='Indian Prison Statistics by Caste',
    width=950,
    height=720,
    template='presentation',
    facet_col='caste',
    facet_col_wrap=2,
    color_discrete_map={'Male':'gray','Female':'red'})) 

pio.show(barchart)

In [66]:
# Plotting gender as facet columns with colour mapped in color_discrete_map

barchart = (px.bar(
    data_frame=df_gender,
    x='year',
    y='convicts',
    color='gender',
    opacity=0.9,
    orientation='v',
    barmode='overlay', #barmode can be 'group', 'overlay' or 'relative'
    labels={'convicts':'Convicts in Maharashtra',
           'gender': 'Gender'},
    title='Indian Prison Statistics by Gender',
    width=950,
    height=720,
    template='presentation',
    facet_col='gender',
    facet_col_wrap=2,
    color_discrete_map={'Male':'gray','Female':'red'})) 

pio.show(barchart)

In [92]:
# Plotting gender with coninuous colour, colour='convicts' which is a discrete value

barchart = (px.bar(
    data_frame=df_gender,
    x='year',
    y='convicts',
    color='convicts', # color has to be a discrete value
    opacity=0.9,
    orientation='v',
    barmode='overlay', #barmode can be 'group', 'overlay' or 'relative'
    labels={'convicts':'Convicts in Maharashtra',
           'gender': 'Gender'},
    title='Indian Prison Statistics, showing number of Convicts by year',
    width=950,
    height=720,
    template='gridon',
    color_continuous_scale=px.colors.diverging.Earth,
    range_color=[6400,8500] #setting the color range to be around the limits of the data
    )) 

pio.show(barchart)

In [52]:
#fake margin of error, standard deviation, or 95% confidence interval
# df['err_plus'] = df['convicts']/100
# df['err_minus'] = df['convicts']/40

barchart = px.bar(
    data_frame=df,
    x="year",
    y="convicts",
    color="gender",               # differentiate color of marks
    opacity=0.9,                  # set opacity of markers (from 0 to 1)
    orientation="v",              # 'v','h': orientation of the marks
    barmode='relative',           # in 'overlay' mode, bars are top of one another.
                                  # in 'group' mode, bars are placed beside each other.
                                  # in 'relative' mode, bars are stacked above (+) or below (-) zero.
    #----------------------------------------------------------------------------------------------
    # facet_row='caste',          # assign marks to subplots in the vertical direction
    # facet_col='caste',          # assigns marks to subplots in the horizontal direction
    # facet_col_wrap=2,           # maximum number of subplot columns. Do not set facet_row!

    # color_discrete_sequence=["pink","yellow"],               # set specific marker colors. Color-colum data cannot be numeric
    # color_discrete_map={"Male": "gray" ,"Female":"red"},     # map your chosen colors
    # color_continuous_scale=px.colors.diverging.Picnic,       # set marker colors. When color colum is numeric data
    # color_continuous_midpoint=100,                           # set desired midpoint. When colors=diverging
    # range_color=[1,10000],                                   # set your own continuous color scale
    #----------------------------------------------------------------------------------------------
    # text='convicts',            # values appear in figure as text labels
    # hover_name='under_trial',   # values appear in bold in the hover tooltip
    # hover_data=['detenues'],    # values appear as extra data in the hover tooltip
    # custom_data=['others'],     # invisible values that are extra data to be used in Dash callbacks or widgets

    # log_x=True,                 # x-axis is log-scaled
    # log_y=True,                 # y-axis is log-scaled
    # error_y="err_plus",         # y-axis error bars are symmetrical or for positive direction
    # error_y_minus="err_minus",  # y-axis error bars in the negative direction

    labels={"convicts":"Convicts in Maharashtra",
    "gender":"Gender"},           # map the labels of the figure
    title='Indian Prison Statistics', # figure title
    width=1400,                   # figure width in pixels
    height=720,                   # figure height in pixels
    template='gridon',            # 'ggplot2', 'seaborn', 'simple_white', 'plotly',
                                  # 'plotly_white', 'plotly_dark', 'presentation',
                                  # 'xgridoff', 'ygridoff', 'gridon', 'none'

    # animation_frame='year',     # assign marks to animation frames
    # # animation_group=,         # use only when df has multiple rows with same object
    # # range_x=[5,50],           # set range of x-axis
    # range_y=[0,9000],           # set range of x-axis
    # category_orders={'year':    # force a specific ordering of values per column
    # [2013,2012,2011,2010,2009,2008,2007,2006,2005,2004,2003,2002,2001]},

)

# barchart.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 1000
# barchart.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 500

# barchart.update_layout(uniformtext_minsize=14, uniformtext_mode='hide',
#                        legend={'x':0,'y':1.0}),
# barchart.update_traces(texttemplate='%{text:.2s}', textposition='outside',
#                        width=[.3,.3,.3,.3,.3,.3,.6,.3,.3,.3,.3,.3,.3])

