#EDA on gapminder

In [1]:
import plotly.express as px

gapminder = px.data.gapminder()

gapminder.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4


In [2]:
gapminder["year"].unique

<bound method Series.unique of 0       1952
1       1957
2       1962
3       1967
4       1972
        ... 
1699    1987
1700    1992
1701    1997
1702    2002
1703    2007
Name: year, Length: 1704, dtype: int64>

In [3]:
nordic = gapminder[
    gapminder["country"].isin(["Sweden", "Norway", "Denmark", "Finland", "Iceland"])
    ] #Blir boolean mask



In [4]:
nordic.dtypes

country       object
continent     object
year           int64
lifeExp      float64
pop            int64
gdpPercap    float64
iso_alpha     object
iso_num        int64
dtype: object

In [5]:
fig = px.line(
    nordic,
    y="lifeExp",
    x="year",
    color = "country",
    title=("Life expectancy in the nordic countries")
)

fig.update_layout(
     hovermode = "x",
     plot_bgcolor="white",
         xaxis=dict(
         title="Year",    
        title_font=dict(
            size=18,    # Font size
            family="Arial Black, sans-serif",  # Font type (optional)
            color="black"  # Font color
        )
    ),
    yaxis=dict(
        title="Life Expectancy",
        title_font=dict(
            size=18,
            family="Arial Black, sans-serif",  # "Arial Black" makes it look bolder
            color="black"
        )
    )
    
)

fig.update_xaxes(
    showspikes = True,
    spikemode = "across",
    spikesnap = "cursor",

)


fig.show()

In [6]:
gapminder.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    1704 non-null   object 
 1   continent  1704 non-null   object 
 2   year       1704 non-null   int64  
 3   lifeExp    1704 non-null   float64
 4   pop        1704 non-null   int64  
 5   gdpPercap  1704 non-null   float64
 6   iso_alpha  1704 non-null   object 
 7   iso_num    1704 non-null   int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 106.6+ KB


In [7]:
gapminder["year"].unique

<bound method Series.unique of 0       1952
1       1957
2       1962
3       1967
4       1972
        ... 
1699    1987
1700    1992
1701    1997
1702    2002
1703    2007
Name: year, Length: 1704, dtype: int64>

#Get lowest life expectancy

In [8]:
import duckdb

poor = duckdb.query(
    """--sql

    SELECT
    "country",
    "year",
    "lifeExp",

    FROM gapminder
    WHERE YEAR == 2007
    ORDER BY "lifeExp" ASC
    LIMIT 5
   
    """

)

poor

┌──────────────┬───────┬───────────────────┐
│   country    │ year  │      lifeExp      │
│   varchar    │ int64 │      double       │
├──────────────┼───────┼───────────────────┤
│ Swaziland    │  2007 │            39.613 │
│ Mozambique   │  2007 │            42.082 │
│ Zambia       │  2007 │ 42.38399999999999 │
│ Sierra Leone │  2007 │ 42.56800000000001 │
│ Lesotho      │  2007 │            42.592 │
└──────────────┴───────┴───────────────────┘

In [9]:
import duckdb

poor = duckdb.query(
    """--sql

    SELECT
    "country",
    "year",
    "lifeExp",

    FROM gapminder
    WHERE "country" IN ('Swaziland', 'Mozambique', 'Zambia', 'Sierra Leone', 'Lesotho')
    ORDER BY "country", "year"

    """

)

poor

┌─────────┬───────┬───────────────────┐
│ country │ year  │      lifeExp      │
│ varchar │ int64 │      double       │
├─────────┼───────┼───────────────────┤
│ Lesotho │  1952 │ 42.13800000000001 │
│ Lesotho │  1957 │            45.047 │
│ Lesotho │  1962 │            47.747 │
│ Lesotho │  1967 │            48.492 │
│ Lesotho │  1972 │            49.767 │
│ Lesotho │  1977 │            52.208 │
│ Lesotho │  1982 │            55.078 │
│ Lesotho │  1987 │             57.18 │
│ Lesotho │  1992 │            59.685 │
│ Lesotho │  1997 │            55.558 │
│   ·     │    ·  │               ·   │
│   ·     │    ·  │               ·   │
│   ·     │    ·  │               ·   │
│ Zambia  │  1962 │            46.023 │
│ Zambia  │  1967 │            47.768 │
│ Zambia  │  1972 │            50.107 │
│ Zambia  │  1977 │            51.386 │
│ Zambia  │  1982 │ 51.82100000000001 │
│ Zambia  │  1987 │ 50.82100000000001 │
│ Zambia  │  1992 │              46.1 │
│ Zambia  │  1997 │            40.238 │


In [88]:
def create_life_expectancy_plot(df, title):

    fig = px.line(
        df,
        y="lifeExp",
        x="year",
        color = "country",
        title=title
    )

    fig.update_layout(
        title_font=dict(
        family="Arial Black, sans-serif",  
        size=24,                          
        color="black"                     
    ),
        hovermode = "x",
        plot_bgcolor="white",
            xaxis=dict(
                title=dict(
                    text="",
                ),
                tickfont=dict(
                    family="Arial, sans-serif",
                    size=12,
                    color="gray"  # <-- Numbers (ticks) will be gray
                ),
                automargin=True
            ), 
        yaxis=dict(
            title="Life Expectancy [years]",
            title_font=dict(
                size=18,
                family="Arial Black, sans-serif",  # "Arial Black" makes it look bolder
                color="gray"
                ),
                tickfont=dict(
                    family="Arial, sans-serif",
                    size=12,
                    color="gray"  # <-- Numbers (ticks) on Y-axis will be gray too
                 )
            
        ),
        #Legends 
        legend=dict(
        title = "<b>Countries</b>",
        bordercolor="gray",                 
        borderwidth=1,                       # border thickness
        font=dict(
            family="Arial, sans-serif",  
            size=10,
            color="gray"
        )
    ),

        annotations=[
            dict(
                text="Year [1952-2007]",           # <-- Your fake title
                x=0,                   # <-- Far left (0 = left, 1 = right)
                xref="paper",
                y=-0.08,                   # <-- At the bottom
                yref="paper",
                showarrow=False,
                font=dict(
                    size=18,
                    family="Arial Black, sans-serif",
                    color="gray"
                ),
                xanchor="left",         # <-- Anchor it to the left side
                yanchor="top"
            )
        ]
    )

    fig.update_xaxes(
        showspikes = True,
        spikemode = "across",
        spikesnap = "cursor",

    )


    return fig

In [91]:
fig_poor = create_life_expectancy_plot(poor, "Bottom five countries for Life Expectancy")

fig_poor.show()

In [12]:
print(nordic['country'].unique())


['Denmark' 'Finland' 'Iceland' 'Norway' 'Sweden']


In [90]:
fig_rich = create_life_expectancy_plot(nordic, "Life Expectancy in the nordic countries")

fig_rich.show()