In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("/kaggle/input/bangladesh-population-growth-ratio/data-resource_2016_10_24_bangladesh-population-growth-ratio.csv")

In [None]:
df.head(5)

In [None]:
df.tail(2)

In [None]:
df.describe()

In [None]:
df.isnull().sum()

# What is the total population trend from 1971 to 2016?

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.figure(figsize=(12,6))
sns.lineplot(x=df['Year'], y=df['Population'], marker='o', color='b')
plt.xlabel("Year")
plt.ylabel("Total Population")
plt.title("Population Growth Over Time")
plt.grid()
plt.show()

# What is the male-to-female ratio over the years?

In [None]:
df['Male_Female_Ratio'] = df['Male'] / df['Female']
plt.figure(figsize=(12,6))
sns.lineplot(x=df['Year'], y=df['Male_Female_Ratio'], marker='o', color='g')
plt.xlabel("Year")
plt.ylabel("Male-to-Female Ratio")
plt.title("Male-Female Ratio Over the Years")
plt.grid()
plt.show()

# What are the min, max, and avg population values?

In [None]:
df["Population"].min()


In [None]:
 df["Population"].max()


In [None]:
df["Population"].mean()

# Is the population growth rate increasing or decreasing?

In [None]:
df['Growth Rate (%)'] = df['Population'].pct_change() * 100
plt.figure(figsize=(12,6))
sns.lineplot(x=df['Year'], y=df['Growth Rate (%)'], marker='o', color='b')
plt.xlabel("Year")
plt.ylabel("Growth Rate (%)")
plt.title("Annual Population Growth Rate")
plt.grid()
plt.show()

# What is the average annual growth rate?

In [None]:
avg_growth_rate = df['Growth Rate (%)'].mean()

In [None]:
avg_growth_rate

# How does the growth rate of males compare to females?

In [None]:
df['Male Growth Rate (%)'] = df['Male'].pct_change() * 100
df['Female Growth Rate (%)'] = df['Female'].pct_change() * 100

plt.figure(figsize=(12,6))
sns.lineplot(x=df['Year'], y=df['Male Growth Rate (%)'], label="Male", color='blue')
sns.lineplot(x=df['Year'], y=df['Female Growth Rate (%)'], label="Female", color='pink')
plt.xlabel("Year")
plt.ylabel("Growth Rate (%)")
plt.title("Male vs Female Population Growth Rate")
plt.legend()
plt.grid()
plt.show()

# What was the highest and lowest population growth between two consecutive years?

In [None]:
max_growth = df.loc[df['Growth Rate (%)'].idxmax()]
min_growth = df.loc[df['Growth Rate (%)'].idxmin()]
print(f"Year with Highest Growth: {max_growth['Year']}, Growth: {max_growth['Growth Rate (%)']:.2f}%")
print(f"Year with Lowest Growth: {min_growth['Year']}, Growth: {min_growth['Growth Rate (%)']:.2f}%")


# Prediction

In [None]:
from sklearn.linear_model import LinearRegression
X = df[['Year']].values
y_male = df['Male'].values
y_female = df['Female'].values

In [None]:
# Train Linear Regression Models
model_male = LinearRegression()
model_female = LinearRegression()

In [None]:
model_male.fit(X, y_male)
model_female.fit(X, y_female)

In [None]:
# Predict future populations
import numpy as np
future_years = np.arange(2017, 2031).reshape(-1, 1)
male_predictions = model_male.predict(future_years)
female_predictions = model_female.predict(future_years)

In [None]:
for year, male_pop, female_pop in zip(future_years.flatten(), male_predictions, female_predictions):
    print(f"Predicted Male Population in {year}: {int(male_pop)}")
    print(f"Predicted Female Population in {year}: {int(female_pop)}")