# Exercise 2.0


In [118]:
import plotly.express as px
import pandas as pd

df = pd.read_csv("data/share-of-individuals-using-the-internet.csv")

df.head()

Unnamed: 0,Entity,Code,Year,Individuals using the Internet (% of population)
0,Afghanistan,AFG,1990,0.0
1,Afghanistan,AFG,1991,0.0
2,Afghanistan,AFG,1992,0.0
3,Afghanistan,AFG,1993,0.0
4,Afghanistan,AFG,1994,0.0


In [119]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7184 entries, 0 to 7183
Data columns (total 4 columns):
 #   Column                                            Non-Null Count  Dtype  
---  ------                                            --------------  -----  
 0   Entity                                            7184 non-null   object 
 1   Code                                              6717 non-null   object 
 2   Year                                              7184 non-null   int64  
 3   Individuals using the Internet (% of population)  7184 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 224.6+ KB


In [120]:
df.columns

Index(['Entity', 'Code', 'Year',
       'Individuals using the Internet (% of population)'],
      dtype='object')

In [121]:
df2023 = df[df["Year"] == 2023]
df2023

Unnamed: 0,Entity,Code,Year,Individuals using the Internet (% of population)
59,Albania,ALB,2023,83.1356
229,Argentina,ARG,2023,89.2290
352,Austria,AUT,2023,95.3347
450,Bahrain,BHR,2023,100.0000
483,Bangladesh,BGD,2023,44.5027
...,...,...,...,...
6762,United Arab Emirates,ARE,2023,100.0000
6890,Upper-middle-income countries,,2023,80.0000
6957,Uzbekistan,UZB,2023,89.0136
7052,Vietnam,VNM,2023,78.0800


In [122]:
df2023 = df2023[
    df2023["Entity"].isin(
        [
            "North America (WB)",
            "Europe and Central Asia (WB)",
            "Latin America and Caribbean (WB)",
            "East Asia and Pacific (WB)",
            "Middle East and North Africa (WB)",
            "South Asia (WB)",
            "Sub-Saharan Africa (WB)",
            "World",
        ]
    )
]
df2023

Unnamed: 0,Entity,Code,Year,Individuals using the Internet (% of population)
1794,East Asia and Pacific (WB),,2023,79.0
2115,Europe and Central Asia (WB),,2023,90.1
3521,Latin America and Caribbean (WB),,2023,81.0
4286,Middle East and North Africa (WB),,2023,77.7
4828,North America (WB),,2023,97.3
6136,Sub-Saharan Africa (WB),,2023,36.7
7086,World,OWID_WRL,2023,67.4


In [123]:
continents = [
    "North America (WB)",
    "Europe and Central Asia (WB)",
    "Latin America and Caribbean (WB)",
    "East Asia and Pacific (WB)",
    "Middle East and North Africa (WB)",
    "Sub-Saharan Africa (WB)",
    "World",
]
df2023 = df2023[df2023["Entity"].isin(continents)]
df2023

Unnamed: 0,Entity,Code,Year,Individuals using the Internet (% of population)
1794,East Asia and Pacific (WB),,2023,79.0
2115,Europe and Central Asia (WB),,2023,90.1
3521,Latin America and Caribbean (WB),,2023,81.0
4286,Middle East and North Africa (WB),,2023,77.7
4828,North America (WB),,2023,97.3
6136,Sub-Saharan Africa (WB),,2023,36.7
7086,World,OWID_WRL,2023,67.4


In [124]:
south_asia = df[(df["Entity"] == "South Asia (WB)") & (df["Year"] == 2021)]
south_asia

Unnamed: 0,Entity,Code,Year,Individuals using the Internet (% of population)
5997,South Asia (WB),,2021,42.850647


In [125]:
df_all = pd.concat([df2023, south_asia])
df_all

Unnamed: 0,Entity,Code,Year,Individuals using the Internet (% of population)
1794,East Asia and Pacific (WB),,2023,79.0
2115,Europe and Central Asia (WB),,2023,90.1
3521,Latin America and Caribbean (WB),,2023,81.0
4286,Middle East and North Africa (WB),,2023,77.7
4828,North America (WB),,2023,97.3
6136,Sub-Saharan Africa (WB),,2023,36.7
7086,World,OWID_WRL,2023,67.4
5997,South Asia (WB),,2021,42.850647


In [126]:
dfall = df_all.sort_values(
    by="Individuals using the Internet (% of population)", ascending=False
)
dfall

Unnamed: 0,Entity,Code,Year,Individuals using the Internet (% of population)
4828,North America (WB),,2023,97.3
2115,Europe and Central Asia (WB),,2023,90.1
3521,Latin America and Caribbean (WB),,2023,81.0
1794,East Asia and Pacific (WB),,2023,79.0
4286,Middle East and North Africa (WB),,2023,77.7
7086,World,OWID_WRL,2023,67.4
5997,South Asia (WB),,2021,42.850647
6136,Sub-Saharan Africa (WB),,2023,36.7


In [215]:
fig = px.bar(
    dfall,
    x="Individuals using the Internet (% of population)",
    y="Entity",
    title="Share of the population using the Internet, 2023",
    color="Entity",
)

fig.add_annotation(
    text="Share of the population who used the Internet in the last three months.",
    xref="paper",
    yref="paper",
    x=-0.407,
    y=1.07,
    showarrow=False,
    font=dict(size=16),
)

fig.add_annotation(
    text="in 2021",
    xref="paper",
    yref="paper",
    x=0.507,
    y=0.1701,
    showarrow=False,
    font=dict(size=14, color="lightgrey"),
)

fig.update_traces(
    marker_color="blueviolet", textposition="outside", texttemplate="%{x:.1f}%", width=0.6
)

fig.update_layout(
    title={
        'text': "Share of the population using the Internet, 2023",
        'x': 0.012,
        'xanchor': 'left',
        'font': dict(size=28)
    },
    xaxis_title=None,
    yaxis_title=None,
    yaxis=dict(tickfont=dict(size=20)),
    plot_bgcolor="white",
    # title_font_size=28,
    showlegend=False,
    width=1300,
    height=800,
)
fig.show()