In [140]:
import math
import statistics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import csv

rng = np.random.default_rng()

### Age


I picked several ranges of ages, excluding those over 64, as people were less educated at all levels in the past in Ireland.  I also excluded those under 20, as they would not have had the opportunity yet to obtain higher levels of education.  

https://www.cso.ie/en/releasesandpublications/ep/p-cp10esil/p10esil/le/

In [141]:
# Age

# Print 100 Random Samples of different age brackets
# AgeA = 20 - 24
# AgeB = 25 - 29
# AgeC = 30 - 34
# AgeD = 35- 39
# AgeE = 40 - 44
# AgeF = 45 - 49
# AgeG = 50 - 54
# AgeH = 55 - 59
# AgeI = 60 - 64


Age = ['AgeA', 'AgeB,', 'AgeC', 'AgeD', 'AgeE', 'AgeF', 'AgeG', 'AgeH,', 'AgeI', 'AgeJ']
rng.choice(Age, size = 100)

array(['AgeH,', 'AgeH,', 'AgeE', 'AgeF', 'AgeG', 'AgeB,', 'AgeE', 'AgeJ',
       'AgeD', 'AgeH,', 'AgeH,', 'AgeE', 'AgeC', 'AgeB,', 'AgeH,', 'AgeF',
       'AgeA', 'AgeH,', 'AgeE', 'AgeB,', 'AgeB,', 'AgeD', 'AgeI', 'AgeA',
       'AgeA', 'AgeF', 'AgeF', 'AgeE', 'AgeD', 'AgeF', 'AgeG', 'AgeH,',
       'AgeA', 'AgeB,', 'AgeH,', 'AgeF', 'AgeH,', 'AgeF', 'AgeC', 'AgeA',
       'AgeA', 'AgeF', 'AgeJ', 'AgeE', 'AgeG', 'AgeC', 'AgeJ', 'AgeE',
       'AgeH,', 'AgeG', 'AgeD', 'AgeI', 'AgeH,', 'AgeH,', 'AgeG', 'AgeG',
       'AgeA', 'AgeI', 'AgeH,', 'AgeB,', 'AgeC', 'AgeF', 'AgeE', 'AgeJ',
       'AgeE', 'AgeF', 'AgeB,', 'AgeH,', 'AgeH,', 'AgeI', 'AgeI', 'AgeD',
       'AgeD', 'AgeC', 'AgeF', 'AgeH,', 'AgeB,', 'AgeD', 'AgeB,', 'AgeI',
       'AgeG', 'AgeD', 'AgeI', 'AgeG', 'AgeD', 'AgeA', 'AgeA', 'AgeC',
       'AgeH,', 'AgeI', 'AgeC', 'AgeF', 'AgeB,', 'AgeD', 'AgeD', 'AgeJ',
       'AgeJ', 'AgeI', 'AgeD', 'AgeA'], dtype='<U5')

### Education

I decided on dividing the levels of education up into:  No Education, Secondary, Higher Certificate, Degree and Postgraduate.
This was based on information from the Central Statistics Office.  

https://www.cso.ie/en/releasesandpublications/ep/p-gpii/geographicalprofilesofincomeinireland2016/education/


In [146]:
# Education

# Assign the percentages of each age bracket having attained education at the various levels.
# Pick Randommly Generated Samples from each Age Bracket
# Due to the percentages adding up to 0.9 in total for AgeA, I added 0.020 to each variable to make the probabilities usable.
# Some of the other numbers had to be rounded by similar small fractions of percentages too. 


Education = []

for i in Age:
    if i == 'AgeA':
        [i] = rng.choice((['NoEd', 'Second', 'HC', 'Deg', 'PG']), 1, p = (0.042, 0.552, 0.112, 0.252, 0.042))
    
    elif i == 'AgeB':
        [i] = rng.choice((['NoEd', 'Second', 'HC', 'Deg', 'PG']), 1, p = (0.032, 0.382, 0.132, 0.322, 0.152))
    
    elif i == 'AgeC':
        [i] = rng.choice((['NoEd', 'Second', 'HC', 'Deg', 'PG']), 1, p = (0.04, 0.35, 0.15, 0.29, 0.17))
    
    elif i == 'AgeD':
        [i] = rng.choice((['NoEd', 'Second', 'HC', 'Deg', 'PG']), 1, p = (0.04, 0.37, 0.15, 0.27, 0.17))
    
    elif i == 'AgeE':
        [i] = rng.choice((['NoEd', 'Second', 'HC', 'Deg', 'PG']), 1, p = (0.048, 0.418, 0.148, 0.238, 0.148))
    
    elif i == 'AgeF':
        [i] = rng.choice((['NoEd', 'Second', 'HC', 'Deg', 'PG']), 1, p = (0.06, 0.49, 0.14, 0.19, 0.12))
    
    elif i == 'AgeG':
        [i] = rng.choice((['NoEd', 'Second', 'HC', 'Deg', 'PG']), 1, p = (0.078, 0.528, 0.128, 0.168, 0.098))
    
    elif i == 'AgeH':
        [i] = rng.choice((['NoEd', 'Second', 'HC', 'Deg', 'PG']), 1, p = (0.122, 0.542, 0.112, 0.142, 0.82))
    
    elif i == 'AgeI':
        [i] = rng.choice((['NoEd', 'Second', 'HC', 'Deg', 'PG']), 1, p = (0.208, 0.508, 0.088, 0.128, 0.068))
    
    elif i == 'AgeJ':
        [i] = rng.choice((['NoEd', 'Second', 'HC', 'Deg', 'PG']), 1, p = (0.33, 0.44, 0.07, 0.10, 0.06))
        
    Education.append(i)
    
    print (Education)
    


['HC']
['HC', 'AgeB,']
['HC', 'AgeB,', 'HC']
['HC', 'AgeB,', 'HC', 'Second']
['HC', 'AgeB,', 'HC', 'Second', 'HC']
['HC', 'AgeB,', 'HC', 'Second', 'HC', 'NoEd']
['HC', 'AgeB,', 'HC', 'Second', 'HC', 'NoEd', 'Second']
['HC', 'AgeB,', 'HC', 'Second', 'HC', 'NoEd', 'Second', 'AgeH,']
['HC', 'AgeB,', 'HC', 'Second', 'HC', 'NoEd', 'Second', 'AgeH,', 'Second']
['HC', 'AgeB,', 'HC', 'Second', 'HC', 'NoEd', 'Second', 'AgeH,', 'Second', 'NoEd']


### Average Wage Based on Level of Education

https://www.cso.ie/en/releasesandpublications/ep/p-gpii/geographicalprofilesofincomeinireland2016/education/

I took the average male and female figures from the Central Statistics Office and got the mean of each.  
    
No Education - €14158

Secondary (Listed as Upper Secondary in Reference) - €20154

Higher Certificate  - €27568

Diploma (Listed as Honours Degree in Reference) - €39370

Post Graduate (Listed as MA and PHD Combined in Reference) - €55024

### Range

Based on findings stating that salaries ranged from €734 to €477 per week (with Trinity as highest and Letterkenny IT as lowest) this shows a range variance of €14404 per year.  I decided to take this as a loose model for standard deviation of salaries in their respective groups.  Range = 1404, Standard Deviation = 7000.

https://www.joe.ie/news/college-graduate-salary-ireland-2021-new-study-732019


In doing research on the range of salaries across different levels of education, I realised that this could really come to any amount.  With people’s life situations and a multitude of various factors taken into account I made an educated guess based on the resource above to move forward with the project. I realise the limitations of the accuracy of this approach.


In [143]:
# Average Wage based on Education

AvEdWage = []

for i in range (100):
    if Education [i] == 'NoEd':
        i = rng.normal (14158, 7000, 1)
        
    elif Education [i] == 'Second':
        i = rng.normal (20154, 7000, 1)
        
    elif Education [i] == 'HC':
        i = rng.normal (27568, 7000, 1)
        
    elif Education [i] == 'Deg':
        i = rng.normal (39370, 7000, 1)
        
    elif Education [i] == 'PG':
        i = rng.normal (55024, 7000, 1)
        
    AvEdWage.append(i)
    
    print (AvEdWage)
    
    

[array([30186.50549781])]
[array([30186.50549781]), 1]
[array([30186.50549781]), 1, array([33768.20297575])]
[array([30186.50549781]), 1, array([33768.20297575]), array([44568.25142859])]
[array([30186.50549781]), 1, array([33768.20297575]), array([44568.25142859]), array([24887.64605528])]
[array([30186.50549781]), 1, array([33768.20297575]), array([44568.25142859]), array([24887.64605528]), array([27797.05440432])]
[array([30186.50549781]), 1, array([33768.20297575]), array([44568.25142859]), array([24887.64605528]), array([27797.05440432]), array([28043.75711648])]
[array([30186.50549781]), 1, array([33768.20297575]), array([44568.25142859]), array([24887.64605528]), array([27797.05440432]), array([28043.75711648]), 7]
[array([30186.50549781]), 1, array([33768.20297575]), array([44568.25142859]), array([24887.64605528]), array([27797.05440432]), array([28043.75711648]), 7, array([16201.08228653])]
[array([30186.50549781]), 1, array([33768.20297575]), array([44568.25142859]), array([

IndexError: list index out of range

### Gender

Males earn 14% more on average in Ireland. 



In [147]:
# Average Salary by Gender in Ireland 
# Males earn 14% more on average
# In the code I will icrease the male salaries by 7% and decrease the female salaries by 7% to re-create these figures.

# Gender - Create Empty Array

G = np.array(rng.choice(['Male', 'Female'], 100, p=(.50, .50)))
genderpaygap = []

# Loop the output and append to a list. Add 14% average increase to males.

for i in range(100):
    if G[i] == 'Male':
        i = 1.07
    else:
        if G[i] == 'Female':
            i = 0.93
            
    genderpaygap.append(i)
            
print(genderpaygap)
G

[1.07, 1.07, 0.93, 0.93, 0.93, 1.07, 1.07, 1.07, 1.07, 0.93, 1.07, 0.93, 0.93, 0.93, 1.07, 1.07, 1.07, 0.93, 0.93, 1.07, 0.93, 1.07, 1.07, 1.07, 0.93, 0.93, 0.93, 0.93, 1.07, 0.93, 0.93, 1.07, 0.93, 1.07, 0.93, 1.07, 1.07, 0.93, 0.93, 1.07, 0.93, 0.93, 0.93, 0.93, 0.93, 0.93, 0.93, 0.93, 1.07, 0.93, 1.07, 1.07, 0.93, 0.93, 0.93, 0.93, 0.93, 1.07, 1.07, 0.93, 1.07, 0.93, 1.07, 0.93, 0.93, 1.07, 0.93, 1.07, 0.93, 0.93, 1.07, 0.93, 0.93, 0.93, 0.93, 0.93, 0.93, 1.07, 1.07, 0.93, 1.07, 0.93, 0.93, 0.93, 1.07, 0.93, 0.93, 0.93, 1.07, 1.07, 0.93, 0.93, 1.07, 1.07, 1.07, 0.93, 0.93, 0.93, 1.07, 0.93]


array(['Male', 'Male', 'Female', 'Female', 'Female', 'Male', 'Male',
       'Male', 'Male', 'Female', 'Male', 'Female', 'Female', 'Female',
       'Male', 'Male', 'Male', 'Female', 'Female', 'Male', 'Female',
       'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Female',
       'Male', 'Female', 'Female', 'Male', 'Female', 'Male', 'Female',
       'Male', 'Male', 'Female', 'Female', 'Male', 'Female', 'Female',
       'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Male',
       'Female', 'Male', 'Male', 'Female', 'Female', 'Female', 'Female',
       'Female', 'Male', 'Male', 'Female', 'Male', 'Female', 'Male',
       'Female', 'Female', 'Male', 'Female', 'Male', 'Female', 'Female',
       'Male', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female',
       'Male', 'Male', 'Female', 'Male', 'Female', 'Female', 'Female',
       'Male', 'Female', 'Female', 'Female', 'Male', 'Male', 'Female',
       'Female', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female',


### Ethnicity

The mean Ethnicity Pay Gap in Ireland is - 5.07%, meaning people of an Ethnic Minority earn 5.07% less on average.

https://seetec.ie/legal/ethnicity-pay-gap-report/

82% of the population of Ireland is part the Ethnic Majority (White Irish) and 18% makes up the Ethnic Minorities.

https://minorityrights.org/country/ireland/

In [148]:
# Average Salary by Ethnicity (Majority and Minority) in Ireland 

# Ethnicity - Create Empty Array

E = np.array(rng.choice(['Majority', 'Minority'], 100, p=(.82, .18)))
ethnicpaygap = []

# Loop the output and append to a list. Take 5.07% average increase to males.

for i in range(100):
    if G[i] == 'Majority':
        i = 1.00
    else:
        if G[i] == 'Minority':
            i = 0.95
            
    genderpaygap.append(i)
            
print(genderpaygap)
E

[1.07, 1.07, 0.93, 0.93, 0.93, 1.07, 1.07, 1.07, 1.07, 0.93, 1.07, 0.93, 0.93, 0.93, 1.07, 1.07, 1.07, 0.93, 0.93, 1.07, 0.93, 1.07, 1.07, 1.07, 0.93, 0.93, 0.93, 0.93, 1.07, 0.93, 0.93, 1.07, 0.93, 1.07, 0.93, 1.07, 1.07, 0.93, 0.93, 1.07, 0.93, 0.93, 0.93, 0.93, 0.93, 0.93, 0.93, 0.93, 1.07, 0.93, 1.07, 1.07, 0.93, 0.93, 0.93, 0.93, 0.93, 1.07, 1.07, 0.93, 1.07, 0.93, 1.07, 0.93, 0.93, 1.07, 0.93, 1.07, 0.93, 0.93, 1.07, 0.93, 0.93, 0.93, 0.93, 0.93, 0.93, 1.07, 1.07, 0.93, 1.07, 0.93, 0.93, 0.93, 1.07, 0.93, 0.93, 0.93, 1.07, 1.07, 0.93, 0.93, 1.07, 1.07, 1.07, 0.93, 0.93, 0.93, 1.07, 0.93, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


array(['Majority', 'Majority', 'Majority', 'Majority', 'Majority',
       'Majority', 'Majority', 'Majority', 'Majority', 'Majority',
       'Majority', 'Majority', 'Minority', 'Minority', 'Majority',
       'Majority', 'Majority', 'Minority', 'Majority', 'Majority',
       'Majority', 'Majority', 'Majority', 'Majority', 'Majority',
       'Majority', 'Majority', 'Majority', 'Majority', 'Majority',
       'Majority', 'Majority', 'Majority', 'Majority', 'Majority',
       'Majority', 'Minority', 'Majority', 'Majority', 'Minority',
       'Majority', 'Majority', 'Majority', 'Majority', 'Majority',
       'Majority', 'Majority', 'Majority', 'Minority', 'Majority',
       'Majority', 'Majority', 'Majority', 'Majority', 'Majority',
       'Majority', 'Majority', 'Majority', 'Majority', 'Majority',
       'Majority', 'Majority', 'Majority', 'Majority', 'Majority',
       'Minority', 'Minority', 'Minority', 'Majority', 'Majority',
       'Majority', 'Minority', 'Majority', 'Majority', 'Majori