In [2]:
from plotly.data import gapminder
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import Image, display
import pandas as pd
import seaborn as sns


In [3]:
gapminder_df = gapminder(datetimes=True, centroids=True, pretty_names=True)
gapminder_df["Year"] = gapminder_df.Year.dt.year
gapminder_df.head(5)

Unnamed: 0,Country,Continent,Year,Life Expectancy,Population,GDP per Capita,ISO Alpha Country Code,ISO Numeric Country Code,Centroid Longitude,Centroid Latitude
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4,65.0,33.0
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4,65.0,33.0
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4,65.0,33.0
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4,65.0,33.0
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4,65.0,33.0


In [4]:
gapminder_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 10 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Country                   1704 non-null   object 
 1   Continent                 1704 non-null   object 
 2   Year                      1704 non-null   int32  
 3   Life Expectancy           1704 non-null   float64
 4   Population                1704 non-null   int64  
 5   GDP per Capita            1704 non-null   float64
 6   ISO Alpha Country Code    1704 non-null   object 
 7   ISO Numeric Country Code  1704 non-null   int64  
 8   Centroid Longitude        1704 non-null   float64
 9   Centroid Latitude         1704 non-null   float64
dtypes: float64(4), int32(1), int64(2), object(3)
memory usage: 126.6+ KB


In [6]:
gapminder_df['Country'].value_counts()

Country
Afghanistan          12
Pakistan             12
New Zealand          12
Nicaragua            12
Niger                12
                     ..
Eritrea              12
Equatorial Guinea    12
El Salvador          12
Egypt                12
Zimbabwe             12
Name: count, Length: 142, dtype: int64

# Visualization

# Bar chart (Population)

In [31]:
df_us = px.data.gapminder().query("country == 'Vietnam'")
px.bar(df_us, x="year", y="pop")

Using threshold to see which country have more than 80 milion people in 2007

In [32]:
df_europe = px.data.gapminder().query("continent == 'Asia' and year == 2007 and pop > 80.e6")
fig = px.bar(df_europe, x="country", y="pop", text="pop", color="country", labels={"x": "Country", "y": "Population"})
fig.update_traces(texttemplate="%{text:.2s}", textposition="outside")
fig.update_layout(uniformtext_minsize=8)
fig.update_layout(xaxis_tickangle=-45)
fig


# Pie Chart (Top popuplation in Asia)

In [52]:
df_asia = px.data.gapminder().query("year == 2007").query("continent == 'Asia'")
fig = px.pie(df_asia, values="pop", names="country", title="Population of Asian continent", color_discrete_sequence=px.colors.sequential.RdBu)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

# Line Chart (GDP, POP of selected countries from 1952 to 2007)

In [23]:
df = px.data.gapminder()

countries = ['Vietnam', 'Thailand', 'Malaysia', 'Singapore', 'Indonesia', 'Philippines', 'Cambodia']
df_selected = df[df['country'].isin(countries)]

fig = px.line(df_selected, x='year', y='gdpPercap', color='country', title='GDP per capita over time')
fig.update_layout(width=1200, height = 800)
fig.show()

In [51]:
df = px.data.gapminder()
fig = px.line_3d(df, x="year", y="continent", z="pop", color="continent", 
                 line_group="country", hover_name="country", width=1000, height=800)

fig.show()

# Scatter Plot

Now, let's see scatter plot visualize the population of continents

In [62]:
df = px.data.gapminder()
fig = px.scatter_3d(df, x="continent", y="year", z="pop", color="pop", opacity=0.7, hover_name="country", width=1000, height=800)
fig.show()

AGE and GDP have correlation?

In [5]:
df_gapminder = px.data.gapminder()
fig = px.scatter(df_gapminder, 
                 x="gdpPercap", 
                 y="lifeExp", 
                 color="continent", 
                 size="pop", 
                 hover_data=["year", "country", "pop"],
                 size_max=60)
fig.update_layout(xaxis=dict(range=[0, 50000]))  # Example range, adjust as needed
fig.show()

# Box Plot - Violin Plot 

In [14]:
df_gapminder = px.data.gapminder()
fig = px.box(df_gapminder, x="continent", y="gdpPercap", points='all',color="continent", title="Boxplot of GDP per Capita by Continent")
fig.show()


In [27]:
df_gapminder = px.data.gapminder()

years = [2007, 1997]
df_gapminder_years = df_gapminder[df_gapminder['year'].isin(years)]

fig = go.Figure()

colors = px.colors.qualitative.Safe

for i, continent in enumerate(df_gapminder_years['continent'].unique()):
    df_continent = df_gapminder_years[df_gapminder_years['continent'] == continent]
    fig.add_trace(go.Violin(x=df_continent["year"],
                            y=df_continent["gdpPercap"],
                            legendgroup=continent,
                            scalegroup=continent,
                            name=continent,
                            side="positive",  # Change side if needed
                            line_color=colors[i % len(colors)]  # Cycle through colors
                           ))

# Update layout
fig.update_layout(title="Violin Plot of GDP per Capita by Continent and Year",
                  xaxis_title="Year",
                  yaxis_title="GDP per Capita")

fig.show()


# Map scatter plot


In [25]:
df = px.data.gapminder().query("year == 2007")
fig = px.scatter_geo(df, locations="iso_alpha", color="continent", hover_name="country", size="pop", projection="natural earth", width=800, height=600)
fig
# orthographic, natural earth, robinson, sinusoidal

# Animated plot

In [59]:
df_cnt = px.data.gapminder()
px.scatter(df_cnt, x="gdpPercap", y="lifeExp", animation_frame="year", animation_group="country", size="pop", color="continent", hover_name="country", log_x=True, size_max=55, range_x=[100, 100000], range_y=[25, 90])



In [12]:
df = px.data.gapminder()
fig = px.scatter(df, x="gdpPercap", y="lifeExp", animation_frame="year", animation_group="country",
           size="pop", color="continent", hover_name="country",
           log_x=True, size_max=55, range_x=[100,100000], range_y=[25,90],
           # mode = 'markers+lines'
           height = 600, width = 1000
          )

# lineas and markers on first display
fig.for_each_trace(lambda t: t.update(mode = 'lines+markers'))

# lineas and markers on animation frames
for fr in fig.frames:
    for d in fr.data:
        d.update(mode='markers+lines')
        
fig.show()


In [26]:
px.bar(df_cnt, x="continent", y="pop", color="continent", animation_frame="year", animation_group="country", range_y=(0, 4500000000))

# Density Heatmap


In [43]:
df = px.data.gapminder()
fig = px.density_heatmap(df, x="year", y="continent", z="pop", marginal_x="histogram", marginal_y="histogram", title= "Observations population by continent and year")

fig.show()

# Histogram

In [8]:
df_gapminder = px.data.gapminder()

# Create histogram for GDP per capita
fig_gdp = px.histogram(df_gapminder, x="gdpPercap", nbins=30, title="Histogram of GDP per Capita")
fig_gdp.update_layout(xaxis_title="GDP per Capita", yaxis_title="Count")

# Create histogram for life expectancy
fig_life = px.histogram(df_gapminder, x="lifeExp", nbins=30, title="Histogram of Life Expectancy")
fig_life.update_layout(xaxis_title="Life Expectancy", yaxis_title="Count")
fig_life.update_traces(marker_color="indianred")

fig_gdp.show()
fig_life.show()


# Thanks For Paying Attention

In [14]:
gif = "https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExdzh3ZHJidDV0Z2hyZnM5Nm5nNDFzN2R1dWRucjFyd2VrajFsMzM3aCZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/2yLNN4wTy7Zr8JSXHB/giphy-downsized-large.gif"
display(Image(url=gif))