<img src="images/population.png" alt="Example Image" width="1300" style="display: block; margin: 0 auto;">

<h1 style="color:#250902; font-size:24px; font-weight:bold;">Content</h1>

<span style="color:#250902; font-size:18px;">In this Dataset, we have Historical Population data for every Country/Territory in the world by different parameters like Area Size of the Country/Territory, Name of the Continent, Name of the Capital, Density, Population Growth Rate, Ranking based on Population, World Population Percentage, etc.</span>

<span style="color:#250902; font-size:18px; font-weight:bold;">Dataset Glossary (Column-Wise)</span>
<ul style="color:#250902;">
    <li>Rank: Rank by Population</li>
    <li>CCA3: 3 Digit Country/Territories Code</li>
    <li>Country/Territories: Name of the Country/Territories</li>
    <li>Capital: Name of the Capital</li>
    <li>Continent: Name of the Continent</li>
    <li>2022 Population: Population of the Country/Territories in the year 2022</li>
    <li>2020 Population: Population of the Country/Territories in the year 2020</li>
    <li>2015 Population: Population of the Country/Territories in the year 2015</li>
    <li>2010 Population: Population of the Country/Territories in the year 2010</li>
    <li>2000 Population: Population of the Country/Territories in the year 2000</li>
    <li>1990 Population: Population of the Country/Territories in the year 1990</li>
    <li>1980 Population: Population of the Country/Territories in the year 1980</li>
    <li>1970 Population: Population of the Country/Territories in the year 1970</li>
    <li>Area (km²): Area size of the Country/Territories in square kilometer</li>
    <li>Density (per km²): Population Density per square kilometer</li>
    <li>Growth Rate: Population Growth Rate by Country/Territories</li>
    <li>World Population Percentage: The population percentage by each Country/Territories</li>
</ul>

<h1 style="padding:10px;background-color:#e09f3e;margin:0;color:#250902;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Import Libraries</h1>

In [41]:
import ipywidgets as widgets
from IPython.display import display
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

<h1 style="padding:10px;background-color:#e09f3e;margin:0;color:#250902;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Load Dataset</h1>

In [42]:
df = pd.read_csv('dataset/world_population.csv')
df.head()

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0


In [43]:
for col in df.columns:
    if 'Population' and '0' in col:
        df = df.rename(columns={col: col.split(' ')[0]})
        
df.sample(20)

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022,2020,2015,2010,2000,1990,1980,1970,Area (km²),Density (per km²),Growth Rate,World Population Percentage
9,140,ARM,Armenia,Yerevan,Asia,2780469,2805608,2878595,2946293,3168523,3556539,3135123,2534377,29743,93.4831,0.9962,0.03
227,51,VEN,Venezuela,Caracas,South America,28301696,28490453,30529716,28715022,24427729,19750579,15210443,11355475,916445,30.882,1.0036,0.35
175,190,LCA,Saint Lucia,Castries,North America,179857,179237,175623,170935,159500,142301,121633,103090,616,291.9756,1.0011,0.0
189,116,SVK,Slovakia,Bratislava,Europe,5643453,5456681,5424444,5396424,5376690,5261305,4973883,4522867,49037,115.0856,1.0359,0.07
18,96,BLR,Belarus,Minsk,Europe,9534954,9633740,9700609,9731427,10256483,10428525,9817257,9170786,207600,45.9295,0.9955,0.12
73,131,GEO,Georgia,Tbilisi,Asia,3744385,3765912,3771132,3836831,4265172,5391636,5145843,4800426,69700,53.7214,0.9964,0.05
131,10,MEX,Mexico,Mexico City,North America,127504125,125998302,120149897,112532401,97873442,81720428,67705186,50289306,1964375,64.9082,1.0063,1.6
173,228,BLM,Saint Barthelemy,Gustavia,North America,10967,10681,9643,8988,7082,5168,2983,2417,21,522.2381,1.0098,0.0
122,45,MYS,Malaysia,Kuala Lumpur,Asia,33938221,33199993,31068833,28717731,22945150,17517054,13215707,10306508,330803,102.5934,1.0109,0.43
113,147,LSO,Lesotho,Maseru,Africa,2305825,2254100,2118521,2022747,1998630,1798997,1407672,1023481,30355,75.962,1.0107,0.03


<h1 style="padding:10px;background-color:#e09f3e;margin:0;color:#250902;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Dataset Shape and Checking NULL Values</h1>

In [44]:
df.shape

(234, 17)

In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Rank                         234 non-null    int64  
 1   CCA3                         234 non-null    object 
 2   Country/Territory            234 non-null    object 
 3   Capital                      234 non-null    object 
 4   Continent                    234 non-null    object 
 5   2022                         234 non-null    int64  
 6   2020                         234 non-null    int64  
 7   2015                         234 non-null    int64  
 8   2010                         234 non-null    int64  
 9   2000                         234 non-null    int64  
 10  1990                         234 non-null    int64  
 11  1980                         234 non-null    int64  
 12  1970                         234 non-null    int64  
 13  Area (km²)          

In [46]:
df.isnull().sum()

Rank                           0
CCA3                           0
Country/Territory              0
Capital                        0
Continent                      0
2022                           0
2020                           0
2015                           0
2010                           0
2000                           0
1990                           0
1980                           0
1970                           0
Area (km²)                     0
Density (per km²)              0
Growth Rate                    0
World Population Percentage    0
dtype: int64

<h1 style="padding:10px;background-color:#e09f3e;margin:0;color:#250902;font-family:sans-serif;font-size:30px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">EDA : Exploratory Data Analysis</h1>

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Population by Countries in World Map View</h1>

In [47]:
population = df.melt(id_vars=['Country/Territory'], value_vars=['2022', '2020', '2010', '2000', '1990', '1980', '1970'], var_name='Year', value_name='Population')
population = population.sort_values('Year')
population.sample(10)

Unnamed: 0,Country/Territory,Year,Population
65,Faroe Islands,2022,53090
117,Lithuania,2022,2750055
54,Dominican Republic,2022,11228821
743,China,2000,1264099069
579,Latvia,2010,2101530
727,Bosnia and Herzegovina,2000,4179350
836,Monaco,2000,32465
975,Chad,1990,5827069
1415,Australia,1970,12595034
1553,Nigeria,1970,55569264


In [48]:
fig = px.choropleth(population, 
              locations = 'Country/Territory',
              color="Population", 
              animation_frame="Year",
              color_continuous_scale='Viridis',
              locationmode='country names',
              title='Total Population over The World (1970-2022)',
              height=600
             )
fig.show()

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Top 5 Countries by Populations in Selected Year</h1>

In [49]:
data = {
    'Country': population['Country/Territory'],
    'Year': population['Year'],
    'Population': population['Population']
}

new_df = pd.DataFrame(data)

year_dropdown = widgets.Dropdown(
    options=new_df['Year'].unique(),
    description='Year:',
    disabled=False,
)

def update_graph(selected_year):
    filtered_data = new_df[new_df['Year'] == selected_year].sort_values(by="Population", ascending=False).head(5)
    
    plt.figure(figsize=(12, 6))
    bars = plt.bar(filtered_data['Country'], filtered_data['Population'] / 1e6, color='#415A77')
    plt.title(f'Top 5 Countries by Population in {selected_year}')
    plt.xlabel('')
    plt.ylabel('')
    plt.xticks(rotation=45)

    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, yval, f'{yval:.2f}M', ha='center', va='bottom')

    plt.grid(False)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['left'].set_visible(True)
    plt.gca().spines['bottom'].set_visible(True)

    plt.show()

widgets.interactive(update_graph, selected_year=year_dropdown)

interactive(children=(Dropdown(description='Year:', options=('1970', '1980', '1990', '2000', '2010', '2020', '…

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Top 5 Countries by Populations Percentage in Selected Year</h1>

In [50]:
data = {
    'Country': population['Country/Territory'],
    'Year': population['Year'],
    'Population': population['Population']
}
new_df = pd.DataFrame(data)

year_dropdown = widgets.Dropdown(
    options=new_df['Year'].unique(),
    description='Year:',
    disabled=False,
)

def update_graph(selected_year):
    filtered_data = new_df[new_df['Year'] == selected_year].sort_values(by="Population", ascending=False).head(5)
    
    plt.figure(figsize=(7, 7))
    plt.pie(
        filtered_data['Population'], 
        labels=filtered_data['Country'], 
        autopct=lambda p: f'{p:.2f}%', 
        startangle=140, 
        colors=['#1D3557', '#457B9D', '#E63946', '#F1FAEE', '#A8DADC' ]
    )
    plt.title(f'Population Percentage of Top 5 Countries in {selected_year}')

    plt.show()

widgets.interactive(update_graph, selected_year=year_dropdown)

interactive(children=(Dropdown(description='Year:', options=('1970', '1980', '1990', '2000', '2010', '2020', '…

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Year-wise Population by Selected Country</h1>

In [51]:
new_data = {
    'Country': population['Country/Territory'],
    'Year': population['Year'],
    'Population': population['Population']
}

new_df_v2 = pd.DataFrame(new_data)

country_dropdown = widgets.Dropdown(
    options=new_df_v2['Country'].unique(),
    description='Country:',
    disabled=False,
)

def update_graph_v2(selected_country):
    filtered_data_v2 = new_df_v2[new_df_v2['Country'] == selected_country]
    
    plt.figure(figsize=(12, 6))
    bars_v2 = plt.bar(filtered_data_v2['Year'], filtered_data_v2['Population'] / 1e6, color='#415A77')
    plt.title(f'Population of Country : {selected_country}')
    plt.xlabel('')
    plt.ylabel('')
    plt.xticks(rotation=45)

    for bar in bars_v2:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, yval, f'{yval:.2f}M', ha='center', va='bottom')

    

    plt.show()

widgets.interactive(update_graph_v2, selected_country=country_dropdown)

interactive(children=(Dropdown(description='Country:', options=('Zimbabwe', 'Guernsey', 'Guatemala', 'Guam', '…

<h1 style="padding:10px;background-color:#e09f3e;margin:0;color:#250902;font-family:sans-serif;font-size:30px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Machine Learning Models to Predicts the Population</h1>

In [52]:
cleaned_df = df[['Country/Territory', '2022', '2020', '2015', '2010', '2000', '1990', '1980', '1970']]
cleaned_df

Unnamed: 0,Country/Territory,2022,2020,2015,2010,2000,1990,1980,1970
0,Afghanistan,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971
1,Albania,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731
2,Algeria,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915
3,American Samoa,44273,46189,51368,54849,58230,47818,32886,27075
4,Andorra,79824,77700,71746,71519,66097,53569,35611,19860
...,...,...,...,...,...,...,...,...,...
229,Wallis and Futuna,11572,11655,12182,13142,14723,13454,11315,9377
230,Western Sahara,575986,556048,491824,413296,270375,178529,116775,76371
231,Yemen,33696614,32284046,28516545,24743946,18628700,13375121,9204938,6843607
232,Zambia,20017675,18927715,16248230,13792086,9891136,7686401,5720438,4281671


In [53]:
df_encoded = pd.get_dummies(cleaned_df, columns=['Country/Territory'], drop_first=True, dtype='int')

df_encoded.columns = [col.replace('Country/Territory_', '') for col in df_encoded.columns]

X = df_encoded.drop(columns=['2022'])
y = df_encoded['2022']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Model : Linear Regression</h1>

In [54]:
model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
r2

0.9998877971256137

In [55]:
def predict_population_lr(country_name):
    if country_name not in df['Country/Territory'].values:
        return f"Country '{country_name}' not found in the dataset."

    cleaned_df = df[['Country/Territory', '2022', '2020', '2015', '2010', '2000', '1990', '1980', '1970']]

    melted_df = cleaned_df.melt(id_vars=['Country/Territory'], var_name='Year', value_name='Population')

    melted_df['Year'] = melted_df['Year'].astype(int)

    X = melted_df[['Country/Territory', 'Year']]
    y = melted_df['Population']

    preprocessor = ColumnTransformer(
        transformers=[
            ('country_ohe', OneHotEncoder(drop='first'), ['Country/Territory']),
            ('year_scaler', StandardScaler(), ['Year'])
        ]
    )

    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', LinearRegression())
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    pipeline.fit(X_train, y_train)

    input_data = pd.DataFrame({'Country/Territory': [country_name], 'Year': [2030]})
    predicted_population = pipeline.predict(input_data)[0]
    
    y_pred = pipeline.predict(X_test)
    r2 = r2_score(y_test, y_pred)

    predicted_population_millions = predicted_population / 1_000_000
    return f"Predicted population of {country_name} in 2030: {predicted_population_millions:.2f} million\nR² Score: {r2:.4f}"

country_to_predict = input("Enter the country name: ")
result = predict_population_lr(country_to_predict)
print(result)

Predicted population of India in 2030: 1108.84 million
R² Score: 0.9630


<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Model : Random Forest</h1>

In [56]:
model_rf = RandomForestRegressor()
model_rf.fit(X_train, y_train)

y_pred_rf = model_rf.predict(X_test)

r2_rf = r2_score(y_test, y_pred_rf)
r2_rf

0.9603397950219483

In [57]:
def predict_population_rf(country_name):
    if country_name not in df['Country/Territory'].values:
        return f"Country '{country_name}' not found in the dataset."

    cleaned_df = df[['Country/Territory', '2022', '2020', '2015', '2010', '2000', '1990', '1980', '1970']]

    melted_df = cleaned_df.melt(id_vars=['Country/Territory'], var_name='Year', value_name='Population')

    melted_df['Year'] = melted_df['Year'].astype(int)

    X = melted_df[['Country/Territory', 'Year']]
    y = melted_df['Population']

    preprocessor = ColumnTransformer(
        transformers=[
            ('country_ohe', OneHotEncoder(drop='first'), ['Country/Territory']),
            ('year_scaler', StandardScaler(), ['Year'])
        ]
    )

    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', RandomForestRegressor(random_state=42))
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    pipeline.fit(X_train, y_train)

    input_data = pd.DataFrame({'Country/Territory': [country_name], 'Year': [2030]})
    predicted_population = pipeline.predict(input_data)[0]
    
    y_pred = pipeline.predict(X_test)
    r2 = r2_score(y_test, y_pred)

    predicted_population_millions = predicted_population / 1_000_000
    return f"Predicted population of {country_name} in 2030: {predicted_population_millions:.2f} million\nR² Score: {r2:.4f}"

country_to_predict = input("Enter the country name: ")
result = predict_population_rf(country_to_predict)
print(result)

Predicted population of India in 2030: 1380.60 million
R² Score: 0.9869


<h1 style="padding:10px;background-color:#e09f3e;margin:0;color:#250902;font-family:sans-serif;font-size:30px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Prediction by Numpy using Formula : (mx + b)</h1>

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Predict the Population of Given Country in 2030</h1>

In [58]:
def predict_population_numpy(country_name):
    if country_name not in df['Country/Territory'].values:
        return f"Country '{country_name}' not found in the dataset."

    cleaned_df = df[['Country/Territory', '2022', '2020', '2015', '2010', '2000', '1990', '1980', '1970']]
    
    country_data = cleaned_df[cleaned_df['Country/Territory'] == country_name].iloc[0, 1:].values

    country_data = pd.to_numeric(country_data, errors='coerce')

    valid_mask = ~np.isnan(country_data)
    country_data = country_data[valid_mask]
    years = np.array([2022, 2020, 2015, 2010, 2000, 1990, 1980, 1970])[valid_mask]

    coeffs = np.polyfit(years, country_data, 1)

    m, b = coeffs
    predicted_population_2030 = m * 2030 + b

    population_data = {
        'Year': [2020, 2022, 2030],
        'Population': [country_data[1], country_data[0], predicted_population_2030]
    }

    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        x=population_data['Year'][:2],
        y=population_data['Population'][:2],
        mode='lines+markers',
        name='Actual Population',
        marker=dict(size=10),
        line=dict(color='#415A77', width=2)
    ))

    fig.add_trace(go.Scatter(
        x=[2022, 2030],
        y=[population_data['Population'][1], predicted_population_2030],
        mode='lines',
        name='Predicted Population 2022-2030',
        line=dict(color='red', dash='dot')
    ))

    fig.add_trace(go.Scatter(
        x=[2030],
        y=[predicted_population_2030],
        mode='markers+text',
        name='Predicted Population 2030',
        marker=dict(size=10, color='red'),
        text=[f"{predicted_population_2030 / 1_000_000:.2f}M"],
        textposition='top center'
    ))

    fig.update_layout(
        title=f'Population of {country_name} in 2030 (using trends)',
        xaxis_title='',
        yaxis_title='',
        yaxis_tickformat=',',
        showlegend=True,
        plot_bgcolor='white',
        xaxis=dict(
            showgrid=False,
            gridcolor='white'
        ),
        yaxis=dict(
            showgrid=False,
            gridcolor='white'
        )
    )

    fig.show()

    predicted_population_millions = predicted_population_2030 / 1_000_000
    return f"Predicted population of {country_name} in 2030 (using trend): {predicted_population_millions:.2f} million"

country_to_predict = input("Enter the country name: ")
result = predict_population_numpy(country_to_predict)
print(result)

Predicted population of India in 2030 (using trend): 1568.09 million


<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Predict the Population of Selected Country in 2030</h1>

In [59]:
def predict_population_numpy(country_name):
    if country_name not in df['Country/Territory'].values:
        return f"Country '{country_name}' not found in the dataset."

    cleaned_df = df[['Country/Territory', '2022', '2020', '2015', '2010', '2000', '1990', '1980', '1970']]
    country_data = cleaned_df[cleaned_df['Country/Territory'] == country_name].iloc[0, 1:].values
    country_data = pd.to_numeric(country_data, errors='coerce')
    valid_mask = ~np.isnan(country_data)
    country_data = country_data[valid_mask]
    years = np.array([2022, 2020, 2015, 2010, 2000, 1990, 1980, 1970])[valid_mask]

    coeffs = np.polyfit(years, country_data, 1)
    m, b = coeffs
    predicted_population_2030 = m * 2030 + b

    population_data = {
        'Year': [2020, 2022, 2030],
        'Population': [country_data[1], country_data[0], predicted_population_2030]
    }

    plt.figure(figsize=(10, 5))
    sns.lineplot(x=population_data['Year'][:2], y=population_data['Population'][:2], marker='o', color='#415A77', label='Actual Population')
    sns.lineplot(x=[2022, 2030], y=[population_data['Population'][1], predicted_population_2030],
                 marker='o', color='red', linestyle='dotted', label='Predicted Population 2022-2030')

    plt.text(2020, population_data['Population'][0], 
             f"{population_data['Population'][0] / 1_000_000:.2f}M", color='black', ha='center')
    plt.text(2022, population_data['Population'][1], 
             f"{population_data['Population'][1] / 1_000_000:.2f}M", color='black', ha='center')
    plt.text(2030, predicted_population_2030, f"{predicted_population_2030 / 1_000_000:.2f}M", 
             color='red', ha='left')

    plt.title(f'Population of {country_name} in 2030 (using trends)')
    plt.xlabel('')
    plt.ylabel('')

    plt.xticks([2020, 2022, 2030])
    
    plt.grid(False)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['left'].set_visible(True)
    plt.gca().spines['bottom'].set_visible(True)

    plt.legend()
    plt.show()

    predicted_population_millions = predicted_population_2030 / 1_000_000
    return f"Predicted population of {country_name} in 2030 (using trend): {predicted_population_millions:.2f} million"

country_dropdown = widgets.Dropdown(
    options=df['Country/Territory'].unique(),
    description='Country:',
    disabled=False,
)

widgets.interactive(predict_population_numpy, country_name=country_dropdown)

interactive(children=(Dropdown(description='Country:', options=('Afghanistan', 'Albania', 'Algeria', 'American…

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Population of Selected Continent in All Years</h1>

In [60]:
population_continent = df.melt(id_vars=['Continent'], value_vars=['2022', '2020', '2010', '2000', '1990', '1980', '1970'], var_name='Year', value_name='Population')
population_continent = population_continent.groupby(['Continent', 'Year'])['Population'].sum().reset_index()

In [61]:
data = {
    'Continent': population_continent['Continent'],
    'Year': population_continent['Year'],
    'Population': population_continent['Population']
}

new_df_v3 = pd.DataFrame(data)

continent_dropdown = widgets.Dropdown(
    options=new_df_v3['Continent'].unique(),
    description='Continent:',
    disabled=False,
)

def update_graph(selected_continent):
    filtered_data = new_df_v3[new_df_v3['Continent'] == selected_continent].sort_values(by="Population", ascending=True)
    
    plt.figure(figsize=(12, 6))
    bars = plt.bar(filtered_data['Year'], filtered_data['Population'] / 1e6, color='#415A77')
    plt.title(f'Population of {selected_continent} from 1970 - 2022')
    plt.xlabel('')
    plt.ylabel('')
    plt.xticks(rotation=45)

    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, yval, f'{yval:.2f}M', ha='center', va='bottom')

    plt.grid(False)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['left'].set_visible(True)
    plt.gca().spines['bottom'].set_visible(True)

    plt.show()

widgets.interactive(update_graph, selected_continent=continent_dropdown)

interactive(children=(Dropdown(description='Continent:', options=('Africa', 'Asia', 'Europe', 'North America',…

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Population of Selected Year in All Continents</h1>

In [62]:
data = {
    'Continent': population_continent['Continent'],
    'Year': population_continent['Year'],
    'Population': population_continent['Population']
}

new_df_v3 = pd.DataFrame(data)

year_dropdown = widgets.Dropdown(
    options=new_df_v3['Year'].unique(),
    description='Year:',
    disabled=False,
)

def update_graph(selected_year):
    filtered_data = new_df_v3[new_df_v3['Year'] == selected_year].sort_values(by="Population", ascending=True)
    
    plt.figure(figsize=(12, 6))
    bars = plt.bar(filtered_data['Continent'], filtered_data['Population'] / 1e6, color='#415A77')
    plt.title(f'Population of Continents in {selected_year}')
    plt.xlabel('')
    plt.ylabel('')
    plt.xticks(rotation=45)

    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, yval, f'{yval:.2f}M', ha='center', va='bottom')

    plt.grid(False)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['left'].set_visible(True)
    plt.gca().spines['bottom'].set_visible(True)

    plt.show()

widgets.interactive(update_graph, selected_year=year_dropdown)

interactive(children=(Dropdown(description='Year:', options=('1970', '1980', '1990', '2000', '2010', '2020', '…

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Continents by Populations Percentage in Selected Year</h1>

In [63]:
data = {
    'Continent': population_continent['Continent'],
    'Year': population_continent['Year'],
    'Population': population_continent['Population']
}
new_df_v4 = pd.DataFrame(data)

year_dropdown = widgets.Dropdown(
    options=new_df_v4['Year'].unique(),
    description='Year:',
    disabled=False,
)

def update_graph(selected_year):
    filtered_data = new_df_v4[new_df_v4['Year'] == selected_year].sort_values(by="Population", ascending=False)
    
    plt.figure(figsize=(7, 7))
    plt.pie(
        filtered_data['Population'], 
        labels=filtered_data['Continent'], 
        autopct=lambda p: f'{p:.2f}%', 
        startangle=140, 
        colors=['#1D3557', '#457B9D', '#E63946', '#F1FAEE', '#A8DADC', '#8d99ae' ]
    )
    plt.title(f'Population Percentage of Continents in {selected_year}')

    plt.show()

widgets.interactive(update_graph, selected_year=year_dropdown)

interactive(children=(Dropdown(description='Year:', options=('1970', '1980', '1990', '2000', '2010', '2020', '…

<h1 style="padding:10px;background-color:#e09f3e;margin:0;color:#250902;font-family:sans-serif;font-size:30px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Prediction by Numpy using Formula : (mx + b)</h1>

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Predict the Population of Given Continents in 2030</h1>

In [64]:
def predict_population_and_plot(continent_name):
    continent_data = population_continent[population_continent['Continent'] == continent_name].sort_values(by='Year')
    
    years = continent_data['Year'].values.astype(int)
    populations = continent_data['Population'].values.astype(float)

    coeffs = np.polyfit(years, populations, 1)
    m, b = coeffs
    predicted_population_2030 = m * 2030 + b

    plot_data = {
        'Year': [2020, 2022, 2030],
        'Population': [populations[-2], populations[-1], predicted_population_2030]
    }

    plt.figure(figsize=(10, 5))
    sns.lineplot(x=plot_data['Year'][:2], y=plot_data['Population'][:2], marker='o', color='#415A77', label=f'Population Trend ({continent_name})')
    sns.lineplot(x=[2022, 2030], y=[plot_data['Population'][1], predicted_population_2030],
                 marker='o', color='red', linestyle='dotted', label='Predicted Population 2022-2030')
    
    plt.text(2020, plot_data['Population'][0], f"{plot_data['Population'][0] / 1_000_000:.2f}M", color='black', ha='center')
    plt.text(2022, plot_data['Population'][1], f"{plot_data['Population'][1] / 1_000_000:.2f}M", color='black', ha='center')
    plt.text(2030, predicted_population_2030, f"{predicted_population_2030 / 1_000_000:.2f}M", color='red', ha='left')
    
    plt.title(f'Population Trend in {continent_name} for 2020, 2022, and Predicted 2030')
    plt.xlabel('Year')
    plt.ylabel('Population')
    plt.xticks([2020, 2022, 2030])
    plt.grid(False)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['left'].set_visible(True)
    plt.gca().spines['bottom'].set_visible(True)
    plt.legend()
    plt.show()
    
    return f"Predicted population of {continent_name} in 2030: {predicted_population_2030 / 1_000_000:.2f} million."

continent_dropdown = widgets.Dropdown(
    options=df['Continent'].unique(),
    description='Select Continent:',
    disabled=False,
)

widgets.interactive(predict_population_and_plot, continent_name=continent_dropdown)

interactive(children=(Dropdown(description='Select Continent:', options=('Asia', 'Europe', 'Africa', 'Oceania'…

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">World Population Trend</h1>

In [65]:
trend = df.iloc[:, 5:13].sum().sort_values(ascending=True)

fig = px.line(x=trend.index, y=trend.values, markers=True)
fig.update_layout(title="World Population Trend (1970-2022)", xaxis_title='', yaxis_title='')
fig.update_layout(
        showlegend=True,
        plot_bgcolor='white',
        xaxis=dict(
            showgrid=False,
            gridcolor='white'
        ),
        yaxis=dict(
            showgrid=False,
            gridcolor='white'
        )
    )
fig.update_xaxes(tickangle=20)
fig.show()

<h1 style="padding:10px;background-color:#e09f3e;margin:0;color:#250902;font-family:sans-serif;font-size:30px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Prediction by Numpy using Formula : (mx + b)</h1>

<h1 style="padding:10px;background-color:#250902;margin:0;color:#e09f3e;font-family:sans-serif;font-size:24px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">Predict the Population of World in 2030</h1>

In [66]:
trend_df = trend.reset_index()
trend_df.columns = ['Year', 'Population']
trend_df['Year'] = trend_df['Year'].astype(int)

years = trend_df['Year'].values
populations = trend_df['Population'].values
coeffs = np.polyfit(years, populations, 1)
m, b = coeffs
predicted_population_2030 = m * 2030 + b

population_data = {
    'Year': [2020, 2022, 2030],
    'Population': [
        trend_df.loc[trend_df['Year'] == 2020, 'Population'].values[0],
        trend_df.loc[trend_df['Year'] == 2022, 'Population'].values[0],
        predicted_population_2030
    ]
}

fig = go.Figure()

# Actual Population Line
fig.add_trace(go.Scatter(
    x=population_data['Year'][:2],
    y=population_data['Population'][:2],
    mode='lines+markers',
    name='Actual Population',
    marker=dict(size=10),
    line=dict(color='#415A77', width=2)
))

# Predicted Population Line
fig.add_trace(go.Scatter(
    x=[2022, 2030],
    y=[population_data['Population'][1], predicted_population_2030],
    mode='lines',
    name='Predicted Population 2022-2030',
    line=dict(color='red', dash='dot')
))

# Predicted Population Marker for 2030
fig.add_trace(go.Scatter(
    x=[2030],
    y=[predicted_population_2030],
    mode='markers+text',
    name='Predicted Population 2030',
    marker=dict(size=10, color='red'),
    text=[f"{predicted_population_2030 / 1_000_000:.2f}M"],
    textposition='top center'
))

# Customize layout
fig.update_layout(
    title='Population Trend Prediction',
    xaxis_title='Year',
    yaxis_title='Population',
    yaxis_tickformat=',',
    showlegend=True,
    plot_bgcolor='white',
    xaxis=dict(
        showgrid=False,
        gridcolor='white'
    ),
    yaxis=dict(
        showgrid=False,
        gridcolor='white'
    )
)

fig.show()

print(f"Predicted population for 2030: {predicted_population_2030 / 1_000_000:.2f} million.")

Predicted population for 2030: 8655.56 million.


<h1 style="padding:10px;background-color:#e09f3e;margin:0;color:#250902;font-family:sans-serif;font-size:30px;text-align:center;border-radius: 50px;overflow:hidden;font-weight:700">THANK YOU</h1>