## PART 1
This jupyter notebook analyses the differences between the sexes by age in Ireland.
- Weighted mean age (by sex)
- The difference between the sexes by age

In [1]:
# This application analyzes data from a CSV file and creates visualizations using matplotlib.
# Author: Dima Kozlovskyy

# Importing the pandas library
import pandas as pd

In [2]:
url = "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/FY006A/CSV/1.0/en"
df = pd.read_csv(url)

In [3]:
# Get the list of column headers
df.columns


Index(['STATISTIC', 'Statistic Label', 'TLIST(A1)', 'CensusYear',
       'C02199V02655', 'Sex', 'C02076V03371', 'Single Year of Age',
       'C03789V04537', 'Administrative Counties', 'UNIT', 'VALUE'],
      dtype='object')

In [4]:
# Get the list of column headers
headers = df.columns.tolist()
headers

['STATISTIC',
 'Statistic Label',
 'TLIST(A1)',
 'CensusYear',
 'C02199V02655',
 'Sex',
 'C02076V03371',
 'Single Year of Age',
 'C03789V04537',
 'Administrative Counties',
 'UNIT',
 'VALUE']

For calculation we need three columns:
- sex(male/female)
- single Year of Age
- value

In [5]:
# Dropping unnecessary columns
drop_columns = [
    'STATISTIC',
    'Statistic Label',
    'TLIST(A1)',
    'CensusYear',
    'C02199V02655',
    'C02076V03371',
    'C03789V04537',
    'UNIT'
]

# Removing the specified columns from the DataFrame
df.drop(columns=drop_columns, inplace=True)

# Get the list of column headers after dropping unnecessary columns
df.columns


Index(['Sex', 'Single Year of Age', 'Administrative Counties', 'VALUE'], dtype='object')

In [6]:
# Filtering out rows where "Single Year of Age" is "All ages"
df = df[df["Single Year of Age"] != "All ages"]

In [7]:
# Output what we have in the 'Single Year of Age' column
df["Single Year of Age"].unique()

array(['Under 1 year', '1 year', '2 years', '3 years', '4 years',
       '5 years', '6 years', '7 years', '8 years', '9 years', '10 years',
       '11 years', '12 years', '13 years', '14 years', '15 years',
       '16 years', '17 years', '18 years', '19 years', '20 years',
       '21 years', '22 years', '23 years', '24 years', '25 years',
       '26 years', '27 years', '28 years', '29 years', '30 years',
       '31 years', '32 years', '33 years', '34 years', '35 years',
       '36 years', '37 years', '38 years', '39 years', '40 years',
       '41 years', '42 years', '43 years', '44 years', '45 years',
       '46 years', '47 years', '48 years', '49 years', '50 years',
       '51 years', '52 years', '53 years', '54 years', '55 years',
       '56 years', '57 years', '58 years', '59 years', '60 years',
       '61 years', '62 years', '63 years', '64 years', '65 years',
       '66 years', '67 years', '68 years', '69 years', '70 years',
       '71 years', '72 years', '73 years', '74 years', '

Getting our data to numerical view.

In [8]:
df['Single Year of Age'] = df['Single Year of Age'].str.replace('Under 1 year', '0')
df['Single Year of Age'] = df['Single Year of Age'].str.replace('\D', '', regex=True)


  df['Single Year of Age'] = df['Single Year of Age'].str.replace('\D', '', regex=True)


Choosing just males and females. Ignoring Both sexes

In [9]:
df = df[df["Sex"] != "Both sexes"]
df["Sex"].unique()

array(['Male', 'Female'], dtype=object)

Fixing table data, converting all data to numeric

In [10]:
df["Single Year of Age"] = pd.to_numeric(df["Single Year of Age"], errors="coerce")
df["VALUE"] = pd.to_numeric(df["VALUE"], errors="coerce")

Calculation for male

In [11]:
df_male = df[df["Sex"] == "Male"]
total_age = (df_male["Single Year of Age"] * df_male["VALUE"]).sum()
total_population = df_male["VALUE"].sum()
weighted_mean_male = total_age / total_population

print(weighted_mean_male)

37.7394477371039


In [12]:
df_female = df[df["Sex"] == "Female"]
total_age = (df_female["Single Year of Age"] * df_female["VALUE"]).sum()
total_population = df_female["VALUE"].sum()
weighted_mean_female = total_age / total_population

print(weighted_mean_female)

38.9397958987787


Difference between ages

In [13]:
age_difference = weighted_mean_female - weighted_mean_male
print(age_difference)


1.2003481616747962


## PART 2

In [14]:
# Variable with targeted age.
age = 35

# Creating boolean masks for age filtering
lower_age_limit = df['Single Year of Age'] >= age - 5 # > 30
higher_age_limit = df['Single Year of Age'] <= age + 5 # < 40

# Combining both masks to filter the DataFrame.
age_mask = lower_age_limit & higher_age_limit

# Groups the DataFrame based on the age mask
df_age_filtered = df[age_mask]

In [15]:
print(df_age_filtered.head(3))

       Sex  Single Year of Age Administrative Counties  VALUE
4256  Male                  30                 Ireland  30858
4257  Male                  30   Carlow County Council    367
4258  Male                  30     Dublin City Council   6163


In [16]:
# Calculating the total male population.
male_population = df_age_filtered[df_age_filtered['Sex'] == 'Male']['VALUE'].sum()

# Calculating the total female population.
female_population = df_age_filtered[df_age_filtered['Sex'] == 'Female']['VALUE'].sum()

population_difference = female_population - male_population

print(population_difference)

60952


## PART 3

In [17]:
df['Administrative Counties'].unique()

array(['Ireland', 'Carlow County Council', 'Dublin City Council',
       'Dún Laoghaire Rathdown County Council', 'Fingal County Council',
       'South Dublin County Council', 'Kildare County Council',
       'Kilkenny County Council', 'Laois County Council',
       'Longford County Council', 'Louth County Council',
       'Meath County Council', 'Offaly County Council',
       'Westmeath County Council', 'Wexford County Council',
       'Wicklow County Council', 'Clare County Council',
       'Cork City Council', 'Cork County Council', 'Kerry County Council',
       'Limerick City & County Council', 'Tipperary County Council',
       'Waterford City & County Council', 'Galway City Council',
       'Galway County Council', 'Leitrim County Council',
       'Mayo  County Council', 'Roscommon County Council',
       'Sligo County Council', 'Cavan County Council',
       'Donegal County Council', 'Monaghan County Council'], dtype=object)

In [18]:
# Grouping by 'Administrative Counties' and 'Sex' to sum the 'VALUE' column.
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html#pandas-dataframe-groupby
population_by_region_sex = df_age_filtered.groupby(['Administrative Counties', 'Sex'])['VALUE'].sum()
print(population_by_region_sex)

Administrative Counties   Sex   
Carlow County Council     Female     4774
                          Male       4451
Cavan County Council      Female     6150
                          Male       5776
Clare County Council      Female     8896
                                    ...  
Westmeath County Council  Male       7089
Wexford County Council    Female    12162
                          Male      10824
Wicklow County Council    Female    11943
                          Male      10338
Name: VALUE, Length: 64, dtype: int64


In [19]:

# Creating the nested dictionary structure.
region_dict = {}

# Populating the nested dictionary with population data.
for (region, sex), value in population_by_region_sex.items():
    if region not in region_dict:
        region_dict[region] = {'Male': 0, 'Female': 0}
    region_dict[region][sex] = value

In [20]:
print(region_dict)

{'Carlow County Council': {'Male': 4451, 'Female': 4774}, 'Cavan County Council': {'Male': 5776, 'Female': 6150}, 'Clare County Council': {'Male': 8085, 'Female': 8896}, 'Cork City Council': {'Male': 18812, 'Female': 19750}, 'Cork County Council': {'Male': 23706, 'Female': 26545}, 'Donegal County Council': {'Male': 10621, 'Female': 11700}, 'Dublin City Council': {'Male': 60867, 'Female': 59831}, 'Dún Laoghaire Rathdown County Council': {'Male': 17074, 'Female': 18450}, 'Fingal County Council': {'Male': 26150, 'Female': 29092}, 'Galway City Council': {'Male': 7156, 'Female': 7650}, 'Galway County Council': {'Male': 12421, 'Female': 13904}, 'Ireland': {'Male': 384030, 'Female': 414506}, 'Kerry County Council': {'Male': 9957, 'Female': 11125}, 'Kildare County Council': {'Male': 18671, 'Female': 20602}, 'Kilkenny County Council': {'Male': 7012, 'Female': 7519}, 'Laois County Council': {'Male': 6877, 'Female': 7398}, 'Leitrim County Council': {'Male': 2203, 'Female': 2500}, 'Limerick City &

In [21]:
max_region = ''
max_difference = 0

# Finding the region with the maximum difference between male and female populations.
for region, counts in region_dict.items():
    difference = counts['Male'] - counts['Female'] 
    if difference > max_difference:
        max_difference = difference
        max_region = region

print(f"Maximum difference in county {max_region} is: {max_difference}")

Maximum difference in county Dublin City Council is: 1036
