# Learning Plotly

**Why [Plotly](https://plotly.com/python/)?**
* It is interactive
* It can be used in other language, e.g. R.
* Easy to use


## Importing libraries 

In [None]:
!pip install -U kaleido

In [5]:
!pip install --upgrade "kaleido==0.1.*" #to save chart as image file

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import kaleido
import plotly.express as px
import statsmodels.api as sm

## Creating Plotly Plots

Plotly graph_object: https://plotly.com/python/reference/index/

Important
* fig.update_layout()
* fig.update_traces()

### Pie Chart

Importing the dataset

In [None]:
dfTest = pd.read_csv("/content/bird-window-collision-death.csv")

In [None]:
dfTest.head()

Unnamed: 0,Date,YEAR,MONTH,Bldg #,Side,SpeciesCode,Common Name,Deaths
0,10/19/2013,2013,OCT,twelve,E,MODO,Mourning Dove,1
1,10/22/2013,2013,OCT,seventeen,C,EATO,Eastern Towhee,1
2,10/23/2013,2013,OCT,ten,D,HETH,Hermit Thrush,1
3,10/27/2013,2013,OCT,seven,B,GCKI,Golden-crowned Kinglet,1
4,10/28/2013,2013,OCT,nine,D,GRCA,Gray Catbird,1


In [None]:
dfTest.columns

Index(['Date', 'YEAR', 'MONTH', 'Bldg #', 'Side', 'SpeciesCode', 'Common Name',
       'Deaths'],
      dtype='object')

In [None]:
df = px.data.tips()
fig = px.pie(df, values='tip', names='day')
fig.show()

In [None]:
fig = px.pie(dfTest, values='Deaths', names='Bldg #')
fig.show()

In [None]:
fig = px.pie(dfTest, values='Deaths', names='Bldg #', color = "Side", hole = 0.3)
fig.show()

In [None]:
fig = px.pie(dfTest, values='Deaths', names='Bldg #',
             color = "Side", hole = 0.3)
fig.update_traces(textinfo = "label+percent", 
                  insidetextfont = dict(color = "white"))
fig.show()

In [None]:
fig = px.pie(dfTest, values='Deaths', names='Bldg #',
             color = "Side", hole = 0.3)
fig.update_traces(textinfo = "label+percent", 
                  insidetextfont = dict(color = "white"))
fig.update_layout(legend = {"itemclick":False})
fig.show()

In [None]:
fig.write_image('donutChart.png')

### Bar Graph

plotly.express.bar: https://plotly.com/python-api-reference/generated/plotly.express.bar.html

code: https://github.com/Coding-with-Adam/Dash-by-Plotly/blob/master/Plotly_Graphs/Bar/bar.py

In [None]:
dfCaste = pd.read_csv("/content/Caste.csv")

In [None]:
dfCaste.head()

Unnamed: 0,state_name,is_state,year,gender,caste,convicts,under_trial,detenues,others
0,Andhra Pradesh,1,2001,Male,SC,1236,1836,0,5
1,Andhra Pradesh,1,2001,Male,ST,418,1205,0,1
2,Andhra Pradesh,1,2001,Male,OBC,1526,3359,4,8
3,Andhra Pradesh,1,2001,Male,Others,939,1960,0,8
4,Andhra Pradesh,1,2002,Male,SC,1177,1573,2,0


In [None]:
dfCaste.columns

Index(['state_name', 'is_state', 'year', 'gender', 'caste', 'convicts',
       'under_trial', 'detenues', 'others'],
      dtype='object')

In [None]:
dfCaste_Mhr = dfCaste[dfCaste['state_name'] == 'Maharashtra']
dfCaste_Mhr = dfCaste_Mhr.groupby(['year','gender'], as_index=False)[['detenues','under_trial', 'convicts','others']].sum()
dfCaste_Mhr.head() 

Unnamed: 0,year,gender,detenues,under_trial,convicts,others
0,2001,Female,1,583,279,0
1,2001,Male,364,14863,6433,4
2,2002,Female,8,648,311,0
3,2002,Male,299,13869,6887,0
4,2003,Female,1,728,390,0


In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative'
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'convicts',
    y='year',
    color = 'gender',
    opacity=0.9,
    orientation='h',
    barmode='relative'
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='overlay'
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='group'
)
barchart.show()

In [None]:
# barchart = px.bar(
#     data_frame=dfCaste_Mhr,
#     x = 'year',
#     y='convicts',
#     color = 'gender',
#     opacity=0.9,
#     orientation='v',
#     barmode='group',
#     facet_row='caste' # assign marks to subplots in the vertical direction
# )
# barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='group',
    facet_col='caste'
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='group',
    facet_col='caste', # assigns marks to subplots in the horizontal direction
    facet_col_wrap=2   # maximum number of subplot columns. Do not set facet_row!
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    color_discrete_sequence=['yellow','pink'] # set specific marker colors. Color-colum data cannot be numeric
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    color_discrete_map={"Male": "gray" ,"Female":"red"}  # map your chosen colors. use a dictionary to map the color with the category
)
barchart.show() 

Plotly Continuous Color Scale: https://plotly.com/python/builtin-colorscales/

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'convicts',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    color_continuous_scale= px.colors.diverging.Picnic, # set marker colors. When color colum is numeric (continuous) data
    range_color=[1,10000]  # set your own continuous color scale
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    text='convicts' # values appear in figure as text labels
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    text='convicts',
    hover_name='under_trial'
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    text='convicts',
    hover_name='under_trial',
    hover_data=['detenues'] # values appear as extra data in the hover tooltip
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    text='convicts',
    hover_name='under_trial',
    hover_data=['detenues'],
    custom_data=['others'] # invisible values that are extra data to be used in Dash callbacks or widgets
)
barchart.show()

In [None]:
dfCaste_Mhr['err_plus'] = dfCaste_Mhr['convicts']/100
dfCaste_Mhr['err_minus'] = dfCaste_Mhr['convicts']/40

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    error_y='err_plus', # y-axis error bars are symmetrical or for positive direction
    error_y_minus='err_minus' # y-axis error bars in the negative direction
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    labels={"convicts":"Convicts in Maharashtra",
    "gender":"Gender"},           # map the labels of the figure
    title='Indian Prison Statistics', # figure title
    width=1400,                   # figure width in pixels
    height=720,                   # figure height in pixels
    template='gridon'   # 'ggplot2', 'seaborn', 'simple_white', 'plotly',
                                  # 'plotly_white', 'plotly_dark', 'presentation',
                                  # 'xgridoff', 'ygridoff', 'gridon', 'none'
)
barchart.show()

Plotly Animation: https://plotly.com/python/animations/

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'gender',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    labels={"convicts":"Convicts in Maharashtra",
    "gender":"Gender"},           
    title='Indian Prison Statistics',
    width=1400,               
    height=720,
    template='gridon',
    animation_frame='year', # assign marks to animation frames
    range_y=[0,9000], # set range of y-axis
    category_orders={'year':  # force a specific ordering of values per column
            [2013,2012,2011,2010,2009,2008,2007,2006,2005,2004,2003,2002,2001]}
)
barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    text='convicts',
    labels={"convicts":"Convicts in Maharashtra",
    "gender":"Gender"},           
    title='Indian Prison Statistics',
    width=1400,               
    height=720,
    template='gridon'
)

#barchart.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 1000
# barchart.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 500

barchart.show()

plotly.graph_objects.Layout: https://plotly.com/python-api-reference/generated/plotly.graph_objects.Layout.html#plotly.graph_objects.Layout

plotly.graph_objects.Bar: https://plotly.com/python-api-reference/generated/plotly.graph_objects.Bar.html#plotly.graph_objects.Bar

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    text='convicts',
    labels={"convicts":"Convicts in Maharashtra",
    "gender":"Gender"},           
    title='Indian Prison Statistics',
    width=1400,               
    height=720,
    template='gridon'
)

barchart.update_layout(uniformtext_minsize=14, 
                       uniformtext_mode='hide',
                      legend={'x':0,'y':1.0})

barchart.show()

In [None]:
barchart = px.bar(
    data_frame=dfCaste_Mhr,
    x = 'year',
    y='convicts',
    color = 'gender',
    opacity=0.9,
    orientation='v',
    barmode='relative',
    text='convicts',
    labels={"convicts":"Convicts in Maharashtra",
    "gender":"Gender"},           
    title='Indian Prison Statistics',
    width=1400,               
    height=720,
    template='gridon'
)

# barchart.update_layout(uniformtext_minsize=14, 
#                        uniformtext_mode='hide',
#                       legend={'x':0,'y':1.0})
barchart.update_traces(texttemplate='%{text:.2s}', textposition='outside',
                        width=[.3,.3,.3,.3,.3,.3,.6,.3,.3,.3,.3,.3,.3])

barchart.show()

### Scatter Plot

Code Reference: https://github.com/Coding-with-Adam/Dash-by-Plotly/tree/master/Plotly_Graphs/Scatter_Plot

plotly.express.scatter: https://plotly.com/python-api-reference/generated/plotly.express.scatter.html#plotly.express.scatter

Symbols for Markers: https://plotly.com/javascript/reference/#scatter-marker-symbol

Plotly Color Scales: https://plotly.com/python/builtin-colorscales/

In [None]:
dfTips = px.data.tips()
dfTips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [None]:
dfTips['tbl-size'] = dfTips['size']
dfTips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tbl-size
0,16.99,1.01,Female,No,Sun,Dinner,2,2
1,10.34,1.66,Male,No,Sun,Dinner,3,3
2,21.01,3.5,Male,No,Sun,Dinner,3,3
3,23.68,3.31,Male,No,Sun,Dinner,2,2
4,24.59,3.61,Female,No,Sun,Dinner,4,4


In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip'
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    size = 'tbl-size', # differentiate markers by size
    size_max=13,        # set maximum marker size
    symbol='smoker' # differentiate markers by symbol
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    size = 'tbl-size', # differentiate markers by size
    size_max=13,        # set maximum marker size
    symbol='smoker', # differentiate markers by symbol
    symbol_sequence=[3, 'square-open']
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    size = 'tbl-size', # differentiate markers by size
    size_max=13,        # set maximum marker size
    symbol='smoker', # differentiate markers by symbol
    symbol_map={'No':'square-open' ,'Yes':3} # map your chosen symbols
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    opacity = 0.5
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    color_discrete_sequence= ["red","green","blue","black"]
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'size',
    color_continuous_scale=px.colors.diverging.Armyrose
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'size',
    color_continuous_scale=px.colors.diverging.Armyrose,
    color_continuous_midpoint=2
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    text = 'tbl-size',
    hover_name = 'time'
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    hover_data=['time']
    #custom_data=['tbl_size']
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    #size = 'tbl-size',
    color = 'day',
    facet_row='tbl-size'
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    facet_col='tbl-size'
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    facet_col='tbl-size',
    facet_col_wrap=3 # maximum number of subplot columns. Do not set facet_row
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    marginal_x ='rug',
    marginal_y ='box'  # options:'rug','box','violin','histogram'
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    trendline='ols' # regression line: 'ols','lowess' (import statsmodels)
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    log_x = True
)

scatterplot.show()

In [None]:
dfTips['err_plus'] = dfTips['total_bill']/100
dfTips['err_minus'] = dfTips['total_bill']/40

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    error_y= 'err_plus',
    error_y_minus= 'err_minus'
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    error_y= 'err_plus',
    error_y_minus= 'err_minus'
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'smoker',
    labels={"tip":"the TIP",
    "smoker":"SMOKER or NOT"},  # map the labels
    title='All Tips',           # figure title
    width=900,                  # figure width in pixels
    height=900,                # igure height in pixels
    template='plotly_white'     # 'ggplot2', 'seaborn', 'simple_white', 'plotly',
                                # 'plotly_white', 'plotly_dark', 'presentation',
                                # 'xgridoff', 'ygridoff', 'gridon', 'none'
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    animation_frame='tbl-size',
    range_x=[5,50],             # set range of x-axis
    range_y=[0,10],             # set range of x-axis
    category_orders={'tbl-size':[1,2,3,4,5,6]}  # set a specific ordering of values per column
)

scatterplot.show()

In [None]:
scatterplot = px.scatter(
    data_frame=dfTips,
    x='total_bill',
    y='tip',
    color = 'day',
    animation_frame='tbl-size',
    range_x=[5,50],             # set range of x-axis
    range_y=[0,10],             # set range of x-axis
    category_orders={'tbl-size':[1,2,3,4,5,6]}  # set a specific ordering of values per column
)

scatterplot.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 1000
scatterplot.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 500

scatterplot.show()

### Animated Scatter Plot

The Code: https://github.com/Coding-with-Adam/Dash-by-Plotly/blob/master/Plotly_Graphs/Animated_Scatter/gender_ineq.py

Plotly Scatter Plot Documentation:https://plotly.com/python-api-reference/generated/plotly.express.scatter.html#plotly.express.scatter

In [None]:
dfGen = pd.read_csv('/content/Gender_StatsData.csv')
dfGen.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,Unnamed: 64
0,Arab World,ARB,"Access to anti-retroviral drugs, female (%)",SH.HIV.ARTC.FE.ZS,,,,,,,...,,,,,,,,,,
1,Arab World,ARB,"Access to anti-retroviral drugs, male (%)",SH.HIV.ARTC.MA.ZS,,,,,,,...,,,,,,,,,,
2,Arab World,ARB,Account ownership at a financial institution o...,FX.OWN.TOTL.FE.ZS,,,,,,,...,13.775815,,,22.07935,,,25.635403,,,
3,Arab World,ARB,Account ownership at a financial institution o...,FX.OWN.TOTL.MA.ZS,,,,,,,...,30.377668,,,37.790764,,,48.328518,,,
4,Arab World,ARB,"Adjusted net enrollment rate, primary, female ...",SE.PRM.TENR.FE,,,,,,,...,83.44254,84.11878,83.21839,83.34494,83.18996,83.82028,83.99478,84.25278,,


In [None]:
the_years = ["1990","1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003",
             "2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017",'2018']

In [None]:
dfGen = dfGen[(dfGen["Indicator Name"]=="Expected years of schooling, female")|\
        (dfGen["Indicator Name"]=="Expected years of schooling, male")]

In [None]:
dfGen = dfGen.groupby(["Country Name","Country Code","Indicator Name"], as_index=False)[the_years].mean()

In [None]:
dfGen.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,1990,1991,1992,1993,1994,1995,1996,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Afghanistan,AFG,"Expected years of schooling, female",,,,,,,,...,6.41198,,7.22629,,,7.68995,,,,
1,Afghanistan,AFG,"Expected years of schooling, male",,,,,,,,...,10.92382,,11.22855,,,12.32561,,,,
2,Albania,ALB,"Expected years of schooling, female",11.30202,11.47907,10.7585,10.35231,10.1538,10.09895,10.19657,...,12.62082,13.38677,14.11511,15.02785,15.42999,15.78335,15.60178,15.40506,15.51123,15.50103
3,Albania,ALB,"Expected years of schooling, male",11.88436,12.03053,10.57412,9.91268,10.02907,10.22864,10.256,...,11.92743,12.64353,13.41751,14.18841,14.48115,14.78336,14.60936,14.25933,14.17994,13.95683
4,Algeria,DZA,"Expected years of schooling, female",,,,,,,,...,13.69353,14.21949,14.57749,,,,,,,


In [None]:
world=["Arab World","South Asia","Latin America & Caribbean","East Asia & Pacific","European Union"]
world_xrange=[4,19]

In [None]:
dfGen = dfGen[dfGen['Country Name'].isin(world)]
dfGen['Country Name'] = pd.Categorical(dfGen['Country Name'], ['South Asia','Arab World','East Asia & Pacific',
                                                         'Latin America & Caribbean',"European Union"])
dfGen.sort_values("Country Name", inplace=True)

In [None]:
dfGen = pd.melt(dfGen,id_vars=['Country Name','Country Code','Indicator Name'],var_name='Year',value_name='Rate')
dfGen.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Year,Rate
0,South Asia,SAS,"Expected years of schooling, female",1990,5.62623
1,South Asia,SAS,"Expected years of schooling, male",1990,8.45663
2,Arab World,ARB,"Expected years of schooling, female",1990,7.39699
3,Arab World,ARB,"Expected years of schooling, male",1990,9.51732
4,East Asia & Pacific,EAS,"Expected years of schooling, female",1990,8.56501


In [None]:
figSactter = px.scatter(dfGen, x="Rate", y="Country Name", 
                      color="Indicator Name", 
                      animation_frame="Year",
                      range_x=world_xrange, range_y=[-0.5,5.0],
                      title="Gender Gaps in our Education",
        labels={"Rate":"Years a child is expected to spend at school/university",
                        "Indicator Name":"Gender"} # customize label
      )

figSactter.show()

In [None]:
figSactter = px.scatter(dfGen, x="Rate", y="Country Name", 
                      color="Indicator Name", 
                      animation_frame="Year",
                      range_x=world_xrange, range_y=[-0.5,5.0],
                      title="Gender Gaps in our Education",
        labels={"Rate":"Years a child is expected to spend at school/university",
                        "Indicator Name":"Gender"} # customize label
)

figSactter.update_layout(title={'x':0.5,'xanchor':'center','font':{'size':20}},
                  xaxis=dict(title=dict(font=dict(size=20))),
                  yaxis={'title':{'text':None}},
                  legend={'font':{'size':18},'title':{'font':{'size':18}}}
)

figSactter.show()

In [None]:
figSactter = px.scatter(dfGen, x="Rate", y="Country Name", 
                      color="Indicator Name", 
                      animation_frame="Year",
                      range_x=world_xrange, range_y=[-0.5,5.0],
                      title="Gender Gaps in our Education",
        labels={"Rate":"Years a child is expected to spend at school/university",
                        "Indicator Name":"Gender"} # customize label
)

figSactter.update_layout(title={'x':0.5,'xanchor':'center','font':{'size':20}},
                  xaxis=dict(title=dict(font=dict(size=20))),
                  yaxis={'title':{'text':None}},
                  legend={'font':{'size':18},'title':{'font':{'size':18}}}
)

figSactter.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 800
figSactter.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 800

figSactter.data[0].name = 'Girl'
figSactter.data[1].name = 'Boy'
figSactter.data[0]['marker'].update(size=14)
figSactter.data[1]['marker'].update(size=14)
figSactter.data[0]['marker'].update(color='#22bc22')
figSactter.data[1]['marker'].update(color="#fda026")

for x in figSactter.frames:
    x.data[0]['marker']['color'] = '#22bc22'
    x.data[1]['marker']['color'] = '#fda026'

figSactter.show()

In [None]:
# asia_latin_years = ["2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017"]

# europe=["Bulgaria","Romania","Denmark","France","Hungary"]
# africa=["Malawi","Egypt, Arab Rep.","Mauritania","Morocco","Lesotho"]
# arab=["Jordan","Oman","Qatar","Tunisia","Syrian Arab Republic"]
# asia_central=["India","Iran, Islamic Rep.","Mongolia","Tajikistan","Uzbekistan"]
# latin_caribb=["El Salvador","Mexico","Argentina","Cuba","Chile"]

# europe_xrange=[10,20]
# africa_xrange=[2,15]
# arab_xrange=[6,17]
# asia_central_xrange=[6,16]
# latin_caribb_xrange=[10,19]

### Box & Violin Plots

Plotly Violin Plot: https://plotly.com/python-api-reference/generated/plotly.express.violin.html#plotly.express.violin

Data: https://quickstats.nass.usda.gov/results/04796016-2A13-39A1-ACCF-A86A4CD21BBD

In [9]:
dfBees = pd.read_csv("/content/bees.csv")
dfBees.head()

Unnamed: 0,Program,Year,Period,State,State ANSI,Data Item,Value
0,SURVEY,2019,JAN THRU MAR,ALABAMA,1,"HONEY, BEE COLONIES, AFFECTED BY DISEASE - INV...",1.8
1,SURVEY,2019,JAN THRU MAR,ALABAMA,1,"HONEY, BEE COLONIES, AFFECTED BY OTHER CAUSES ...",3.1
2,SURVEY,2019,JAN THRU MAR,ALABAMA,1,"HONEY, BEE COLONIES, AFFECTED BY PESTICIDES - ...",0.3
3,SURVEY,2019,JAN THRU MAR,ALABAMA,1,"HONEY, BEE COLONIES, AFFECTED BY PESTS ((EXCL ...",22.7
4,SURVEY,2019,JAN THRU MAR,ALABAMA,1,"HONEY, BEE COLONIES, AFFECTED BY UNKNOWN CAUSE...",9.0


In [10]:
dfBees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4590 entries, 0 to 4589
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Program     4590 non-null   object 
 1   Year        4590 non-null   int64  
 2   Period      4590 non-null   object 
 3   State       4590 non-null   object 
 4   State ANSI  4590 non-null   int64  
 5   Data Item   4590 non-null   object 
 6   Value       4590 non-null   float64
dtypes: float64(1), int64(2), object(4)
memory usage: 251.1+ KB


In [12]:
dfBees.shape

(4590, 7)

In [13]:
dfBees.isnull().sum()

Program       0
Year          0
Period        0
State         0
State ANSI    0
Data Item     0
Value         0
dtype: int64

In [14]:
dfBees['Value'] = pd.to_numeric(dfBees['Value'])

In [15]:
dfBees.columns

Index(['Program', 'Year', 'Period', 'State', 'State ANSI', 'Data Item',
       'Value'],
      dtype='object')

In [19]:
dfBees.groupby('Data Item')['Data Item'].agg('count')

Data Item
HONEY, BEE COLONIES, AFFECTED BY DISEASE - INVENTORY, MEASURED IN PCT OF COLONIES                        765
HONEY, BEE COLONIES, AFFECTED BY OTHER CAUSES - INVENTORY, MEASURED IN PCT OF COLONIES                   765
HONEY, BEE COLONIES, AFFECTED BY PESTICIDES - INVENTORY, MEASURED IN PCT OF COLONIES                     765
HONEY, BEE COLONIES, AFFECTED BY PESTS ((EXCL VARROA MITES)) - INVENTORY, MEASURED IN PCT OF COLONIES    765
HONEY, BEE COLONIES, AFFECTED BY UNKNOWN CAUSES - INVENTORY, MEASURED IN PCT OF COLONIES                 765
HONEY, BEE COLONIES, AFFECTED BY VARROA MITES - INVENTORY, MEASURED IN PCT OF COLONIES                   765
Name: Data Item, dtype: int64

In [17]:
dfBees['Data Item'].unique()

array(['HONEY, BEE COLONIES, AFFECTED BY DISEASE - INVENTORY, MEASURED IN PCT OF COLONIES',
       'HONEY, BEE COLONIES, AFFECTED BY OTHER CAUSES - INVENTORY, MEASURED IN PCT OF COLONIES',
       'HONEY, BEE COLONIES, AFFECTED BY PESTICIDES - INVENTORY, MEASURED IN PCT OF COLONIES',
       'HONEY, BEE COLONIES, AFFECTED BY PESTS ((EXCL VARROA MITES)) - INVENTORY, MEASURED IN PCT OF COLONIES',
       'HONEY, BEE COLONIES, AFFECTED BY UNKNOWN CAUSES - INVENTORY, MEASURED IN PCT OF COLONIES',
       'HONEY, BEE COLONIES, AFFECTED BY VARROA MITES - INVENTORY, MEASURED IN PCT OF COLONIES'],
      dtype=object)

In [20]:
mapping = {
           'HONEY, BEE COLONIES, AFFECTED BY DISEASE - INVENTORY, MEASURED IN PCT OF COLONIES':'Disease',
           'HONEY, BEE COLONIES, AFFECTED BY OTHER CAUSES - INVENTORY, MEASURED IN PCT OF COLONIES':'Other',
           'HONEY, BEE COLONIES, AFFECTED BY PESTICIDES - INVENTORY, MEASURED IN PCT OF COLONIES':'Pesticides',
           'HONEY, BEE COLONIES, AFFECTED BY PESTS ((EXCL VARROA MITES)) - INVENTORY, MEASURED IN PCT OF COLONIES':'Pests_excl_Varroa',
           'HONEY, BEE COLONIES, AFFECTED BY UNKNOWN CAUSES - INVENTORY, MEASURED IN PCT OF COLONIES':'Unknown',
           'HONEY, BEE COLONIES, AFFECTED BY VARROA MITES - INVENTORY, MEASURED IN PCT OF COLONIES':'Varroa_mites'
           }

In [21]:
dfBees['Data Item'] = dfBees['Data Item'].map(mapping) 

In [22]:
dfBees.rename(columns=
              {'Data Item':'Affected by', 
               'Value':'Percent of Colonies Impacted'
               }, inplace=True)

In [23]:
dfBees.head()

Unnamed: 0,Program,Year,Period,State,State ANSI,Affected by,Percent of Colonies Impacted
0,SURVEY,2019,JAN THRU MAR,ALABAMA,1,Disease,1.8
1,SURVEY,2019,JAN THRU MAR,ALABAMA,1,Other,3.1
2,SURVEY,2019,JAN THRU MAR,ALABAMA,1,Pesticides,0.3
3,SURVEY,2019,JAN THRU MAR,ALABAMA,1,Pests_excl_Varroa,22.7
4,SURVEY,2019,JAN THRU MAR,ALABAMA,1,Unknown,9.0


In [28]:
violinfig = px.violin(
    data_frame=dfBees,
    x="Affected by",
    y="Percent of Colonies Impacted",
    category_orders=
    {
        'Affected by':['Disease','Unknown','Pesticides','Other','Pests_excl_Varroa','Varroa_mites']
     },
    orientation="v",
    title="What is killing our Bees",
    template='presentation'
    )
violinfig.show()

In [30]:
violinfig = px.violin(
    data_frame=dfBees,
    x="Percent of Colonies Impacted",
    y="Affected by",
    category_orders=
    {
        'Affected by':['Disease','Unknown','Pesticides','Other','Pests_excl_Varroa','Varroa_mites']
     },
    orientation="h",
    title="What is killing our Bees",
    template='plotly_white'
    )
violinfig.show()

In [43]:
violinfig = px.violin(
    data_frame=dfBees,
    x="Affected by",
    y="Percent of Colonies Impacted",
    category_orders=
    {
        'Affected by':['Disease','Unknown','Pesticides','Other','Pests_excl_Varroa','Varroa_mites']
     },
    orientation='v',
    points='all',  # 'outliers','suspectedoutliers', 'all', or False
    box=True,   # draw box inside the violins
    title='What is killing our Bees',
    template='presentation'
    )
violinfig.show()

In [35]:
dfBees.groupby('State')['State'].agg('count')

State
ALABAMA           102
ARIZONA           102
ARKANSAS          102
CALIFORNIA        102
COLORADO          102
CONNECTICUT       102
FLORIDA           102
GEORGIA           102
HAWAII            102
IDAHO             102
ILLINOIS          102
INDIANA           102
IOWA              102
KANSAS            102
KENTUCKY          102
LOUISIANA         102
MAINE             102
MARYLAND          102
MASSACHUSETTS     102
MICHIGAN          102
MINNESOTA         102
MISSISSIPPI       102
MISSOURI          102
MONTANA           102
NEBRASKA          102
NEW JERSEY        102
NEW MEXICO        102
NEW YORK          102
NORTH CAROLINA    102
NORTH DAKOTA      102
OHIO              102
OKLAHOMA          102
OREGON            102
PENNSYLVANIA      102
SOUTH CAROLINA    102
SOUTH DAKOTA      102
TENNESSEE         102
TEXAS             102
UTAH              102
VERMONT           102
VIRGINIA          102
WASHINGTON        102
WEST VIRGINIA     102
WISCONSIN         102
WYOMING           102
Name

In [38]:
violinfig = px.violin(
    data_frame=dfBees.query("State == ['{}','{}','{}']".format('ALABAMA','NEW YORK','CALIFORNIA')),
    x="Affected by",
    y="Percent of Colonies Impacted",
    category_orders=
    {
        'Affected by':['Disease','Unknown','Pesticides','Other','Pests_excl_Varroa','Varroa_mites']
     },
    orientation='v',
    color = 'State',
    violinmode='group',
    title='What is killing our Bees',
    template='presentation'
    )
violinfig.show()

In [46]:
violinfig = px.violin(
    data_frame=dfBees,
    x="Affected by",
    y="Percent of Colonies Impacted",
    category_orders=
    {
        'Affected by':['Disease','Unknown','Pesticides','Other','Pests_excl_Varroa','Varroa_mites']
     },
    orientation="v",
    points = "all",
    hover_name='State',
    hover_data=['Period'], # values appear as extra data in the hover tooltip
    title="What is killing our Bees",
    template='presentation'
    )
violinfig.show()

In [49]:
violinfig = px.violin(
    data_frame=dfBees.query("State == ['{}','{}']".format('ALABAMA','NEW YORK')),
    x="Affected by",
    y="Percent of Colonies Impacted",
    category_orders=
    {
        'Affected by':['Disease','Unknown','Pesticides','Other','Pests_excl_Varroa','Varroa_mites']
     },
    orientation='v',
    color = 'State',
    violinmode='group',
    facet_row = 'State',
    title='What is killing our Bees',
    template='presentation'
    )
violinfig.show()

In [55]:
violinfig = px.violin(
    data_frame=dfBees.query("State == ['{}','{}']".format('ALABAMA','NEW YORK')),
    x="Affected by",
    y="Percent of Colonies Impacted",
    category_orders=
    {
        'Affected by':['Disease','Unknown','Pesticides','Other','Pests_excl_Varroa','Varroa_mites']
     },
    orientation='v',
    color = 'State',
    violinmode='group',
    facet_col='Period',
    facet_col_wrap=2,
    title='What is killing our Bees',
    template='presentation'
    )
violinfig.show()

In [56]:
violinfig = px.violin(
    data_frame=dfBees,
    x="Affected by",
    y="Percent of Colonies Impacted",
    orientation="v",
    title="What is killing our Bees",
    template='presentation',
    animation_frame='Year',
    range_y=[-5,100],
    category_orders=
    {
        'Year':[2015,2016,2017,2018,2019]
     }
    )

violinfig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 1000

violinfig.show()

In [69]:
violinfig = px.violin(
    data_frame=dfBees.query("State == ['{}','{}']".format('IDAHO','TEXAS')),
    x="Affected by",
    y="Percent of Colonies Impacted",
    category_orders=
    {
        'Affected by':['Disease','Unknown','Pesticides','Other','Pests_excl_Varroa','Varroa_mites']
     },
    orientation='v',
    color = 'State',
    violinmode='group',
    color_discrete_map={"IDAHO": "red" ,"TEXAS":"limegreen"},
    points = "all",
    box=True,
    hover_data=['Period'],
    title='What is killing our Bees',
    template='plotly_dark'
    )

violinfig.update_traces(meanline_visible=True, meanline_color='blue')
# Image trace is a graph object in the figure's data 
# list with any of the named arguments or attributes listed below

violinfig.show()