# A Remake of the Factfullness Bubble Chart

In [1]:
# imports
import pandas as pd
import plotly_express as px

**Load Data From Gapminder's Open Data Source:**

In [2]:
life_expectancy = pd.read_csv(
    "https://raw.githubusercontent.com/open-numbers/ddf--gapminder--systema_globalis/master/ddf--datapoints--life_expectancy_years--by--geo--time.csv")
income = pd.read_csv(
    "https://raw.githubusercontent.com/open-numbers/ddf--gapminder--systema_globalis/master/ddf--datapoints--income_per_person_gdppercapita_ppp_inflation_adjusted--by--geo--time.csv")
population = pd.read_csv(
    "https://raw.githubusercontent.com/open-numbers/ddf--gapminder--systema_globalis/master/ddf--datapoints--population_total--by--geo--time.csv")
countries = pd.read_csv(
    "https://raw.githubusercontent.com/open-numbers/ddf--gapminder--systema_globalis/master/ddf--entities--geo--country.csv")

**Merge Dataframes**

In [3]:
gapminder_df = (life_expectancy
                .merge(income, on=["geo", "time"])
                .merge(population, on=["geo", "time"]))

gapminder_df = (gapminder_df
                .merge(
                    countries[["country", "name", "world_4region"]], 
                    left_on="geo", right_on="country"))

gapminder_df.head()

Unnamed: 0,geo,time,life_expectancy_years,income_per_person_gdppercapita_ppp_inflation_adjusted,population_total,country,name,world_4region
0,afg,1800,28.21,603,3280000,afg,Afghanistan,asia
1,afg,1801,28.2,603,3280000,afg,Afghanistan,asia
2,afg,1802,28.19,603,3280000,afg,Afghanistan,asia
3,afg,1803,28.18,603,3280000,afg,Afghanistan,asia
4,afg,1804,28.17,603,3280000,afg,Afghanistan,asia


**Drop Unnecessary Columns and Rename the Rest**

In [4]:
gapminder_df.drop(columns=["country"], inplace=True)

gapminder_df.rename(columns={"name": "Country",
                             "world_4region": "Region",
                             "time": "Year",
                             "life_expectancy_years": "Life Expectancy",
                             "population_total": "Population",
                             "income_per_person_gdppercapita_ppp_inflation_adjusted":
                             "Income",
                             "geo": "Country Code"},
                    inplace=True)

In [5]:
gapminder_df = gapminder_df[["Country", "Region", "Year", "Population",
                             "Life Expectancy", "Income", "Country Code"]]

**Add Country and Region columns for Visualization**

In [6]:
gapminder_df["Country Code"] = gapminder_df["Country Code"].apply(
    lambda x: x.upper())

gapminder_df["Region Code"] = (gapminder_df["Region"]
                               .map({"africa": 0,
                                     "americas": 1,
                                     "asia": 2,
                                     "europe": 3}))
gapminder_df.head()

Unnamed: 0,Country,Region,Year,Population,Life Expectancy,Income,Country Code,Region Code
0,Afghanistan,asia,1800,3280000,28.21,603,AFG,2
1,Afghanistan,asia,1801,3280000,28.2,603,AFG,2
2,Afghanistan,asia,1802,3280000,28.19,603,AFG,2
3,Afghanistan,asia,1803,3280000,28.18,603,AFG,2
4,Afghanistan,asia,1804,3280000,28.17,603,AFG,2


**World Regions Demonstration Aligned with the Factfullness Bubble Chart**

In [10]:
px.choropleth(gapminder_df, locations=gapminder_df["Country Code"],
              color="Region Code",
              projection="natural earth",
              color_continuous_scale=["#87CEFA", "#32CD32",
                                      "#FF69B4", "#FFFF00"])

**Animated Bubble Chart of How World Has Changed over 200 Years**

In [11]:
px.scatter(gapminder_df,
           x="Income",
           y="Life Expectancy",
           size="Population",
           size_max=60,
           color="Region",
           hover_name="Country",
           animation_frame="Year",
           animation_group="Country",
           color_discrete_sequence=["#FF69B4", "#87CEFA",
                                    "#FFFF00", "#32CD32"],
           log_x=True,
           range_x=[100, 100000],
           range_y=[25, 90])

**Snapshot from 2018 with 4 Different Income Levels**

In [12]:
gapminder_2018 = gapminder_df[gapminder_df["Year"] == 2018]

In [13]:
def income_level_function(series):
    if series <= 2500:
        return "Income Level 1"
    elif series <= 8000:
        return "Income Level 2"
    elif series <= 25000:
        return "Income Level 3"
    else:
        return "Income Level 4"


gapminder_2018["Income Level"] = (gapminder_2018["Income"]
                                  .apply(income_level_function))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [14]:
fig = px.scatter(gapminder_2018,
                 x="Income",
                 y="Life Expectancy",
                 size="Population",
                 size_max=60,
                 color="Region",
                 hover_name="Country",
                 facet_col="Income Level",
                 color_discrete_sequence=["#FF69B4", "#87CEFA",
                                          "#FFFF00", "#32CD32"],
                 range_y=[50, 90],
                 width=1000,
                 labels={"Income": "Income in $s",
                         "Life Expectancy": "Life Expectancy in years"},
                 title="World Map for Health and Wealth (data from 2018)",
                 opacity=1
                 )

fig.layout.xaxis1.update(matches=None)
fig.layout.xaxis2.update(matches=None)
fig.layout.xaxis3.update(matches=None)
fig.layout.xaxis4.update(matches=None)

fig.show()

**Data Sources:**
* Factfullness Animated Bubble Chart: [https://www.gapminder.org/tools/#$state$time$value=2018;&marker$select@$country=tza&trailStartTime=2018;&$country=pse&trailStartTime=2018;;;;&chart-type=bubbles]
* Gapminder Foundation's Open Source Data Community (Open Numbers) using open and reliable data sources for world statistics among various subject matters: https://github.com/open-numbers/ddf--gapminder--systema_globalis
* Plotly Express Documentation: https://medium.com/plotly/introducing-plotly-express-808df010143d