# Sex

This notebook will focus on calculating the male and female non-response rate for gender identity.

## Import libraries

In [2]:
import pandas as pd

## Read-in data

In [3]:
sex = pd.read_excel('../Data/Sex_GI.xlsx')

In [4]:
# Let's take a look...

sex.head()

Unnamed: 0,Lower tier local authorities Code,Lower tier local authorities,Gender identity (7 categories) Code,Gender identity (7 categories),Sex (2 categories) Code,Sex (2 categories),Observation
0,E06000001,Hartlepool,-8,Does not apply,1,Female,0
1,E06000001,Hartlepool,-8,Does not apply,2,Male,0
2,E06000001,Hartlepool,1,Gender identity the same as sex registered at ...,1,Female,36936
3,E06000001,Hartlepool,1,Gender identity the same as sex registered at ...,2,Male,33652
4,E06000001,Hartlepool,2,Gender identity different from sex registered ...,1,Female,93


In [5]:
# Let's drop some unnecessary columns

sex.drop(sex.columns[[0,1,4]], axis = 1, inplace = True)

In [6]:
# Let's subset by the column we want

sex_nr = sex[sex['Gender identity (7 categories) Code'] == 6]

In [8]:
# Neat, it worked.

sex_nr.head()

Unnamed: 0,Gender identity (7 categories) Code,Gender identity (7 categories),Sex (2 categories),Observation
12,6,Not answered,Female,1948
13,6,Not answered,Male,1829
26,6,Not answered,Female,3380
27,6,Not answered,Male,3611
40,6,Not answered,Female,2802


# Analysis

In [14]:
# Use groupby to create dataframe with total non_response observations for each sex

sex_grouped = sex_nr.groupby('Sex (2 categories)')['Observation'].sum().reset_index(name = 'NR_Total')

In [15]:
sex_grouped

Unnamed: 0,Sex (2 categories),NR_Total
0,Female,1449457
1,Male,1465166


In [16]:
# Group the original dataset by the sex category and get the total observations for each sex category

total_obs = sex.groupby('Sex (2 categories)')['Observation'].sum().reset_index(name = 'Total_Observations')

In [17]:
total_obs

Unnamed: 0,Sex (2 categories),Total_Observations
0,Female,25039033
1,Male,23527378


In [18]:
# Divide non-response totals by total observations overall

sex_grouped['NR_rate'] = (sex_grouped['NR_Total'] / total_obs['Total_Observations'] * 100).round(2)

In [19]:
# Done!

sex_grouped

Unnamed: 0,Sex (2 categories),NR_Total,NR_rate
0,Female,1449457,5.79
1,Male,1465166,6.23


# Output

In [22]:
sex_grouped.to_csv('../Data/sex_totals_GI.csv')