In [20]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Markdown
import numpy as np
import seaborn as sns

### Importing data

In [21]:
url = "../Data/Annual_population_eurostat(mandatory).csv"
table = pd.read_csv(url)
display(table.head())
year_offset = 4

Unnamed: 0,Country,ISO3,citizen,sex,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,Austria,AUT,EU28_FOR,F,288028.0,310759.0,329981.0,348012.0,365125.0,383295.0,,,,
1,Belgium,BEL,EU28_FOR,F,416232.0,428008.0,434889.0,440355.0,448655.0,457258.0,,,,
2,Switzerland,CHE,EU28_FOR,F,594052.0,610566.0,622893.0,630371.0,637330.0,645916.0,,,,
3,Cyprus,CYP,EU28_FOR,F,55730.0,57716.0,57673.0,59787.0,63044.0,62935.0,,,,
4,Czechia,CZE,EU28_FOR,F,72465.0,76736.0,81670.0,86132.0,91296.0,96186.0,,,,


### Gender Ratio in countries

In [22]:


def gender_ratio(country):
    male_perc = []
    female_perc = []

    country_male = table.query("Country == '"+country+"' and sex == 'M' and citizen == 'TOTAL'")
    country_female = table.query("Country == '"+country+"' and sex == 'F' and citizen == 'TOTAL'")
    country_total = table.query("Country == '"+country+"' and sex == 'T' and citizen == 'TOTAL'")

    try:
        for i in range(10):
            male_val = country_male.iloc[0, i+4]
            female_val = country_female.iloc[0, i+4]
            total_val = country_total.iloc[0, i+4]

            if pd.notnull(male_val) and pd.notnull(female_val) and pd.notnull(total_val) and total_val != 0:
                ratio1 = (male_val / total_val) * 100
                ratio2 = (female_val / total_val) * 100
            else:
                ratio1 = None
                ratio2 = None

            male_perc.append(ratio1)
            female_perc.append(ratio2)

        years = [x for x in range(2015, 2025)]

        figure, axes = plt.subplots()
        axes.plot(years, male_perc, label="Male as % of Total", color="blue")
        axes.plot(years, female_perc, label="Female as % of Total", color="red")
        axes.set_xticks(years)

        axes.set_xlabel('Year')
        axes.set_ylabel('Percentage of population')
        axes.set_title('Male-to-Total and Female-to-Total Population Ratio in ' + country)
        axes.legend()

        dir_name = os.path.join("../Images/Countries", country)
        os.makedirs(dir_name, exist_ok=True)

        file_path = os.path.join(dir_name, f"gender_ratio_{country.lower()}.png")
        figure.savefig(file_path)
        plt.close(figure)

    except Exception as e:
        print(f"Error {country}: {e}")

for country in table.Country.unique():
    gender_ratio(country)


### Gender count in countries

In [23]:
def gender_count(country):
    male_pop = []
    female_pop = []

    country_male = table.query("Country == '"+country+"' and sex == 'M' and citizen == 'TOTAL'")
    country_female = table.query("Country == '"+country+"' and sex == 'F' and citizen == 'TOTAL'")

    try:
        for i in range(10):
            male_pop.append(country_male.iloc[0, i+4])
            female_pop.append(country_female.iloc[0, i+4])


        years = [x for x in range(2015, 2025)]

        figure, axes = plt.subplots()
        axes.plot(years, male_pop, label="Male population", color="blue")
        axes.plot(years, female_pop, label="Female population", color="red")
        axes.set_xticks(years)

        axes.set_xlabel('Year')
        axes.set_ylabel('Population')
        axes.set_title('Total male and female population in ' + country)
        axes.legend()

        dir_name = os.path.join("../Images/Countries", country)
        os.makedirs(dir_name, exist_ok=True)

        file_path = os.path.join(dir_name, f"gender_count_{country.lower()}.png")
        figure.savefig(file_path)
        plt.close(figure)

    except Exception as e:
        print(f"Error {country}: {e}")

for country in table.Country.unique():
    gender_ratio(country)


### Total and Native citizens

In [24]:
def total_count(country):
    try:
        male_total_data = table.query("Country == '"+country+"' and sex == 'M' and citizen == 'TOTAL'")
        male_nat_data = table.query("Country == '"+country+"' and sex == 'M' and citizen == 'NAT'")
        female_total_data = table.query("Country == '"+country+"' and sex == 'F' and citizen == 'TOTAL'")
        female_nat_data = table.query("Country == '"+country+"' and sex == 'F' and citizen == 'NAT'")

        if (male_total_data.empty or male_nat_data.empty or
            female_total_data.empty or female_nat_data.empty):
            print(f"Skipping {country}: Missing data for one or more categories")
            return

        male_total = []
        male_nat = []
        female_total = []
        female_nat = []

        try:
            maleN_diff = male_nat_data.iloc[0,-1] - male_nat_data.iloc[0,4]
            femaleN_diff = female_nat_data.iloc[0,-1] - female_nat_data.iloc[0,4]
            maleT_diff = male_total_data.iloc[0,-1] - male_total_data.iloc[0,4]
            femaleT_diff = female_total_data.iloc[0,-1] - female_total_data.iloc[0,4]
        except IndexError:
            print(f"Skipping {country}: Data format doesn't match expectations")
            return

        for i in range(10):
            try:
                male_total.append(male_total_data.iloc[0, i+4])
                male_nat.append(male_nat_data.iloc[0, i+4])
                female_total.append(female_total_data.iloc[0, i+4])
                female_nat.append(female_nat_data.iloc[0, i+4])
            except IndexError:
                print(f"Skipping {country}: Not enough data columns (expected at least {i+5} columns)")
                return

        x = np.arange(10)
        width = 0.2
        years = [x for x in range(2015, 2025)]
        fig, ax = plt.subplots(figsize=(12, 7))
        ax.bar(x - width*1.5, male_total, width, label='Male Total', color='skyblue')
        ax.bar(x - width*0.5, male_nat, width, label='Male Native', color='blue')
        ax.bar(x + width*0.5, female_total, width, label='Female Total', color='lightcoral')
        ax.bar(x + width*1.5, female_nat, width, label='Female Native', color='red')

        ax.set_xlabel('Year')
        ax.set_ylabel('Population')
        ax.set_title(f'Population by Gender and Citizenship Status in {country}')
        ax.set_xticks(x)
        ax.set_xticklabels(years)

        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

        stats_text = (
            f"Change in native male: {int(maleN_diff)} | "
            f"Change in native female: {int(femaleN_diff)} | "
            f"Change in total male: {int(maleT_diff)} | "
            f"Change in total female: {int(femaleT_diff)}"
        )

        fig.text(0.5, -0.1, stats_text, ha='center', fontsize=12, wrap=True)
        plt.subplots_adjust(bottom=0.05)

        dir_name = os.path.join("../Images/Countries", country)
        os.makedirs(dir_name, exist_ok=True)

        file_path = os.path.join(dir_name, f"natives_and_total_count_{country.lower()}.png")
        fig.savefig(f'{file_path}', bbox_inches='tight')
        plt.close(fig)

    except Exception as e:
        print(f"Error processing {country}: {str(e)}")

for country in table.Country.unique():
    total_count(country)

Skipping Albania: Missing data for one or more categories
Skipping Armenia: Missing data for one or more categories
Skipping Georgia: Missing data for one or more categories
Skipping Moldova, Republic of: Missing data for one or more categories
Skipping Montenegro: Missing data for one or more categories
Skipping North Macedonia: Missing data for one or more categories
Skipping Serbia: Missing data for one or more categories
Skipping Ukraine: Missing data for one or more categories
