In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px

In [None]:
!pip install -U kaleido

In [None]:
# px.data.gapminder() is a built-in Plotly Express sample database
df = px.data.gapminder().query("year == 2007")
df

In [None]:
fig = px.sunburst(df, path=['continent', 'country'], values='pop',
                  color='lifeExp', hover_data=['iso_alpha'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['lifeExp'], weights=df['pop']))
# If a color argument is passed, the color of a node is computed as 
# the average of the color values of its children, weighted by their values.
fig.show()

In [None]:
# Get the country & continent database ready
df2 = df[['country','continent']].copy()

In [None]:
#change all the ‚ÄúAmericas‚Äù to "South America" in the continent column.
df2.loc[df2["continent"] == "Americas", "continent"] = "South America"

In [None]:
# if country is Canada, Mexico, or United States change continent to "North America"
df2.loc[df2['country'] == 'Canada', "continent"] = "North America"
df2.loc[df2['country'] == 'Mexico', "continent"] = "North America"
df2.loc[df2['country'] == 'United States', "continent"] = "North America"

In [None]:
# selecting rows based on condition
df_Canada = df2.loc[df2['country'] == 'Canada']
df_Canada

In [None]:
# selecting rows based on condition
df_Mexico = df2.loc[df2['country'] == 'Mexico']
df_Mexico

In [None]:
# selecting rows based on condition
df_US = df2.loc[df2['country'] == 'United States']
df_US

In [None]:
dict_num_volcano = {
    "United States": 161,
    "Japan": 122,
    "Indonesia": 121,
    "Russia": 117,
    "Chile": 92,
    "Ethiopia": 53,
    "Papua New Guinea": 47,
    "Philippines": 38,
    "Mexico": 37,
    "Argentina": 36,
    "Ecuador": 35,
    "Iceland": 34,
    "New Zealand": 25,
    "Canada": 24,
    "Guatemala": 23,
    "Tonga": 21,
    "Kenya": 21,
    "El Salvador": 20,
    "France": 20,
    "Antarctica": 19
}   

In [None]:
# create Dataframe from a list of key, value pairs
df3 = pd.DataFrame(list(dict_num_volcano.items()), columns = ['country','num_volcano'])
df3

In [None]:
# merge inner
df4 = pd.merge(left=df2, right=df3, left_on='country', right_on='country')
# Since 'country' is the only column name in both dataframes, we can skip
# the `left_on` and `right_on` arguments

# What's the size of the output data?
df4.shape
df4

In [None]:
# df4 is missing 4 countries!  Let's list them
set(df3.country).difference(set(df4.country))

In [None]:
# NOTE: last_id is not len(df2.index), because some rows are missing
last_id = max(df2.index)

In [None]:
# add the missing rows to the end of the table
df2.loc[last_id + 1] = ['Antarctica', 'Antarctica'] 
df2.loc[last_id + 2] = ['Papua New Guinea', 'Oceania']
df2.loc[last_id + 3] = ['Russia', 'Europe']   # Russia actually spans Asia too.
df2.loc[last_id + 4] = ['Tonga', 'Oceania'] 

In [None]:
df2.tail(10)

In [None]:
# merge inner
df4 = pd.merge(left=df2, right=df3, left_on='country', right_on='country')

# What's the size of the output data?
df4.shape
df4


In [None]:
fig = px.sunburst(df4, path=['continent', 'country'], values='num_volcano',
                  title="Countries with the most volcanos üåã(erupting in the last 12,000 years)")
# The volcano emoji will show up in the chart while in Jupyter notebooks.  
# However, it will not appear in the exported .png file

fig.update_layout(
    title_font_family="Times New Roman",
    title_font_color="red",
    title_x=0.5
)

# If a color argument is passed, the color of a node is computed as 
# the average of the color values of its children, weighted by their values.
fig.show()

In [None]:
fig.write_image("/kaggle/working/fig1.png")