In [18]:
import pandas as pd
import cufflinks as cf
import plotly.express as px
from IPython.display import display, HTML


cf.set_config_file(sharing='public', theme='ggplot', offline=True)

In [21]:
# reading the csv file
df_population_raw = pd.read_csv('population_total.csv')
# dropping null values
df_population_raw.dropna(inplace=True)
# making a pivot table
df_pivot = df_population_raw.pivot(index='year', columns='country',
                                        values='population')
#selecting some countries
df_pivot = df_pivot[['United States', 'India', 'China', 
                          'Indonesia', 'Brazil']]
#showing pivot table
df_pivot

country,United States,India,China,Indonesia,Brazil
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1955.0,171685336.0,409880600.0,612241600.0,77273425.0,62533919.0
1960.0,186720571.0,450547700.0,660408100.0,87751068.0,72179226.0
1965.0,199733676.0,499123300.0,724219000.0,100267062.0,83373530.0
1970.0,209513341.0,555189800.0,827601400.0,114793178.0,95113265.0
1975.0,219081251.0,623102900.0,926240900.0,130680727.0,107216205.0
1980.0,229476354.0,698952800.0,1000089000.0,147447836.0,120694009.0
1985.0,240499825.0,784360000.0,1075589000.0,164982451.0,135274080.0
1990.0,252120309.0,873277800.0,1176884000.0,181413402.0,149003223.0
1995.0,265163745.0,963922600.0,1240921000.0,196934260.0,162019896.0
2000.0,281710909.0,1056576000.0,1290551000.0,211513823.0,174790340.0


# 1 Lineplot

In [22]:
fig = px.line(df_pivot,
              x=df_pivot.index,
              y=df_pivot.columns,
              labels={
                "value": "Population",
                "year": "Year"
                     },
              title="Population Growth Over Time")
fig.show()

# 2 Barplot

In [27]:
# Select only the 2020 row
df_pivot_2020 = df_pivot.loc[[2020]]
# Transpose so countries become the index
df_pivot_2020 = df_pivot_2020.T
df_pivot_2020.columns = ['Population']

In [28]:
fig = px.bar(
    x=df_pivot_2020.index,
    y=df_pivot_2020['Population'],
    labels={"x": "Country", "y": "Population"},
    title="Population by Country (2020)"
)
fig.show()

## 2.1 Barplot grouped by 'n' variables

In [29]:
df_sample = df_pivot.loc[[1960, 1970, 1980, 1990, 2020]]

In [30]:
# Reset index so 'year' becomes a column
df_sample = df_sample.reset_index()

In [31]:
# Melt to long format for plotly
df_long = df_sample.melt(id_vars="year", var_name="country", value_name="population")

In [32]:
import plotly.express as px

fig = px.bar(
    df_long,
    x="country",
    y="population",
    color="year",
    barmode="group",
    title="Population by Country (Selected Years)"
)
fig.show()


# 3 Piechart

In [33]:
df_2020 = df_pivot.loc[2020]

In [34]:
fig = px.pie(
    names=df_2020.index,       # Country names
    values=df_2020.values,     # Population values
    title="Population by Country (2020)"
)

fig.show()

# 4 Boxplot

In [35]:
# Reset index and melt the pivot table
df_long = df_pivot.reset_index().melt(
    id_vars="year", 
    var_name="country", 
    value_name="population"
)

In [36]:
fig = px.box(
    df_long,
    x="country",
    y="population",
    title="Population Distribution by Country (1955-2020)",
    points="all"  # shows all individual points (optional)
)

fig.show()


# 5 Histogram

In [37]:
fig = px.histogram(
    df_long,
    x="population",
    nbins=20,           # number of bins (adjust as needed)
    title="Distribution of Population Across Countries and Years"
)

fig.show()


# 6 Scatterplot

In [38]:
fig = px.scatter(
    df_long,
    x="year",
    y="population",
    color="country",
    title="Population Growth Trend"
)

fig.show()



