## PART 1
This jupyter notebook analyses the differences between the sexes by age in Ireland.
- Weighted mean age (by sex)
- The difference between the sexes by age

In [None]:
# Importing the pandas library
import pandas as pd

In [None]:
url = "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/FY006A/CSV/1.0/en"
df = pd.read_csv(url)

In [None]:
# Get the list of column headers
df.columns


In [None]:
# Get the list of column headers
headers = df.columns.tolist()
headers

For calculation we need three columns:
- sex(male/female)
- single Year of Age
- value

In [None]:
# Dropping unnecessary columns
drop_columns = [
    'STATISTIC',
    'Statistic Label',
    'TLIST(A1)',
    'CensusYear',
    'C02199V02655',
    'C02076V03371',
    'C03789V04537',
    'UNIT'
]

# Removing the specified columns from the DataFrame
df.drop(columns=drop_columns, inplace=True)

# Get the list of column headers after dropping unnecessary columns
df.columns


In [None]:
# Filtering out rows where "Single Year of Age" is "All ages"
df = df[df["Single Year of Age"] != "All ages"]

In [None]:
# Output what we have in the 'Single Year of Age' column
df["Single Year of Age"].unique()

Getting our data to numerical view.

In [None]:
df['Single Year of Age'] = df['Single Year of Age'].str.replace('Under 1 year', '0')
df['Single Year of Age'] = df['Single Year of Age'].str.replace('\D', '', regex=True)


Choosing just males and females. Ignoring Both sexes

In [None]:
df = df[df["Sex"] != "Both sexes"]
df["Sex"].unique()

Fixing table data, converting all data to numeric

In [None]:
df["Single Year of Age"] = pd.to_numeric(df["Single Year of Age"], errors="coerce")
df["VALUE"] = pd.to_numeric(df["VALUE"], errors="coerce")

Calculation for male

In [None]:
df_male = df[df["Sex"] == "Male"]
total_age = (df_male["Single Year of Age"] * df_male["VALUE"]).sum()
total_population = df_male["VALUE"].sum()
weighted_mean_male = total_age / total_population

print(weighted_mean_male)

In [None]:
df_female = df[df["Sex"] == "Female"]
total_age = (df_female["Single Year of Age"] * df_female["VALUE"]).sum()
total_population = df_female["VALUE"].sum()
weighted_mean_female = total_age / total_population

print(weighted_mean_female)

Difference between ages

In [None]:
age_difference = weighted_mean_female - weighted_mean_male
print(age_difference)


## PART 2

In [None]:
# Variable with targeted age.
age = 35

# Creating boolean masks for age filtering
lower_age_limit = df['Single Year of Age'] >= age - 5 # > 30
higher_age_limit = df['Single Year of Age'] <= age + 5 # < 40

# Combining both masks to filter the DataFrame.
age_mask = lower_age_limit & higher_age_limit

# Groups the DataFrame based on the age mask
df_age_filtered = df[age_mask]

In [None]:
print(df_age_filtered.head(3))

In [None]:
# Calculating the total male population.
male_population = df_age_filtered[df_age_filtered['Sex'] == 'Male']['VALUE'].sum()

# Calculating the total female population.
female_population = df_age_filtered[df_age_filtered['Sex'] == 'Female']['VALUE'].sum()

population_difference = female_population - male_population

print(population_difference)

## PART 3

In [None]:
df['Administrative Counties'].unique()

In [None]:
# Grouping by 'Administrative Counties' and 'Sex' to sum the 'VALUE' column.
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html#pandas-dataframe-groupby
population_by_region_sex = df_age_filtered.groupby(['Administrative Counties', 'Sex'])['VALUE'].sum()
print(population_by_region_sex)

In [63]:
region_dict = {}

for (region, sex), value in population_by_region_sex.items():
    if region not in region_dict:
        region_dict[region] = {'Male': 0, 'Female': 0}
    region_dict[region][sex] = value

In [64]:
print(region_dict)

{'Carlow County Council': {'Male': 4451, 'Female': 4774}, 'Cavan County Council': {'Male': 5776, 'Female': 6150}, 'Clare County Council': {'Male': 8085, 'Female': 8896}, 'Cork City Council': {'Male': 18812, 'Female': 19750}, 'Cork County Council': {'Male': 23706, 'Female': 26545}, 'Donegal County Council': {'Male': 10621, 'Female': 11700}, 'Dublin City Council': {'Male': 60867, 'Female': 59831}, 'Dún Laoghaire Rathdown County Council': {'Male': 17074, 'Female': 18450}, 'Fingal County Council': {'Male': 26150, 'Female': 29092}, 'Galway City Council': {'Male': 7156, 'Female': 7650}, 'Galway County Council': {'Male': 12421, 'Female': 13904}, 'Ireland': {'Male': 384030, 'Female': 414506}, 'Kerry County Council': {'Male': 9957, 'Female': 11125}, 'Kildare County Council': {'Male': 18671, 'Female': 20602}, 'Kilkenny County Council': {'Male': 7012, 'Female': 7519}, 'Laois County Council': {'Male': 6877, 'Female': 7398}, 'Leitrim County Council': {'Male': 2203, 'Female': 2500}, 'Limerick City &

In [68]:
max_region = ''
max_difference = 0

for region, counts in region_dict.items():
    difference = counts['Male'] - counts['Female']  # без abs
    if difference > max_difference:  # ищем максимальное значение
        max_difference = difference
        max_region = region

print(f"Maximum difference in county {max_region} is: {max_difference}")

Maximum difference in county Dublin City Council is: 1036
