# Aim

# Sex

This notebook will focus on calculating the male and female non-response rate for sexual orientation. These dataframes will then be incorporated into Bokeh DataTables in [SO_Outputs.ipynb](./SO_Outputs.ipynb), and [main.py](../main.py).

## Import libraries

In [1]:
import pandas as pd



## Read-in data

In [2]:
sex = pd.read_excel('../Data/Sex_SO.xlsx')

In [3]:
# Let's take a look...

sex.head()

Unnamed: 0,Lower tier local authorities Code,Lower tier local authorities,Sexual orientation (6 categories) Code,Sexual orientation (6 categories),Sex (2 categories) Code,Sex (2 categories),Observation
0,E06000001,Hartlepool,-8,Does not apply,1,Female,0
1,E06000001,Hartlepool,-8,Does not apply,2,Male,0
2,E06000001,Hartlepool,1,Straight or Heterosexual,1,Female,35414
3,E06000001,Hartlepool,1,Straight or Heterosexual,2,Male,32656
4,E06000001,Hartlepool,2,Gay or Lesbian,1,Female,563


In [4]:
# Let's drop some unnecessary columns


sex.drop(sex.columns[[0,1,4]], axis = 1, inplace = True)

In [5]:
sex['Sexual orientation (6 categories)'].unique()

array(['Does not apply', 'Straight or Heterosexual', 'Gay or Lesbian',
       'Bisexual', 'All other sexual orientations', 'Not answered'],
      dtype=object)

In [6]:
sex['Sexual orientation (6 categories) Code'].unique()

array([-8,  1,  2,  3,  4,  5])

In [7]:
# Let's subset by the column we want

sex_nr = sex[sex['Sexual orientation (6 categories) Code'] == 5]

In [8]:
# Neat, it worked.

sex_nr.head()

Unnamed: 0,Sexual orientation (6 categories) Code,Sexual orientation (6 categories),Sex (2 categories),Observation
10,5,Not answered,Female,2430
11,5,Not answered,Male,2124
22,5,Not answered,Female,4225
23,5,Not answered,Male,4072
34,5,Not answered,Female,3720


# Analysis

In [9]:
# Use groupby to create dataframe with total non_response observations for each sex

sex_grouped = sex_nr.groupby('Sex (2 categories)')['Observation'].sum().reset_index(name = 'NR_Total')

In [10]:
sex_grouped

Unnamed: 0,Sex (2 categories),NR_Total
0,Female,1897782
1,Male,1728861


In [11]:
# Group the original dataset by the sex category and get the total observations for each sex category

total_obs = sex.groupby('Sex (2 categories)')['Observation'].sum().reset_index(name = 'Total_Observations')

In [12]:
total_obs

Unnamed: 0,Sex (2 categories),Total_Observations
0,Female,25039027
1,Male,23527349


In [13]:
# Divide non-response totals by total observations overall

sex_grouped['NR_rate'] = (sex_grouped['NR_Total'] / total_obs['Total_Observations'] * 100).round(2)

In [14]:
# Done!

sex_grouped

Unnamed: 0,Sex (2 categories),NR_Total,NR_rate
0,Female,1897782,7.58
1,Male,1728861,7.35


# Output

In [15]:
sex_grouped.to_csv('../Data/sex_nr_SO.csv', index = False)
total_obs.to_csv('../Data/sex_tot_SO.csv', index = False)