In [1]:
import pandas as pd

## Read-in dataset

In [2]:
df = pd.read_csv('Data/GI_det.csv')

In [3]:
df.head(10)

Unnamed: 0,Lower tier local authorities Code,Lower tier local authorities,Gender identity (8 categories) Code,Gender identity (8 categories),Observation
0,E06000001,Hartlepool,-8,Does not apply,0
1,E06000001,Hartlepool,1,Gender identity the same as sex registered at ...,70588
2,E06000001,Hartlepool,2,Gender identity different from sex registered ...,167
3,E06000001,Hartlepool,3,Trans woman,49
4,E06000001,Hartlepool,4,Trans man,51
5,E06000001,Hartlepool,5,Non-binary,33
6,E06000001,Hartlepool,6,All other gender identities,21
7,E06000001,Hartlepool,7,Not answered,3777
8,E06000002,Middlesbrough,-8,Does not apply,0
9,E06000002,Middlesbrough,1,Gender identity the same as sex registered at ...,106009


## Data Cleaning

* rename columns
* remove 'Does not apply' from gender identity category
* rename some values in gender identity category so they are not as wordy

In [4]:
df = df.rename(columns = {'Lower tier local authorities Code': 'LA_code', 'Lower tier local authorities': 'LA_name', 'Gender identity (8 categories) Code': 'GI_code', 'Gender identity (8 categories)': 'GI_cat'})

In [5]:
df = df[df.GI_cat != 'Does not apply']

In [6]:
df['GI_cat'] = df['GI_cat'].replace({'Gender identity the same as sex registered at birth': 'Gender identity the same as sex', 'Gender identity different from sex registered at birth but no specific identity given': 'Gender identity different from sex'})

In [7]:
df.head(10)

Unnamed: 0,LA_code,LA_name,GI_code,GI_cat,Observation
1,E06000001,Hartlepool,1,Gender identity the same as sex,70588
2,E06000001,Hartlepool,2,Gender identity different from sex,167
3,E06000001,Hartlepool,3,Trans woman,49
4,E06000001,Hartlepool,4,Trans man,51
5,E06000001,Hartlepool,5,Non-binary,33
6,E06000001,Hartlepool,6,All other gender identities,21
7,E06000001,Hartlepool,7,Not answered,3777
9,E06000002,Middlesbrough,1,Gender identity the same as sex,106009
10,E06000002,Middlesbrough,2,Gender identity different from sex,496
11,E06000002,Middlesbrough,3,Trans woman,141


## Data Pre-processing

Now, let's use our 'Observation' column to calculate % of gender identity categories for each Local Authority

In [8]:
df['Percentage'] = ''

for i in df.LA_name.unique():
    b = df[df.LA_name == i]
    perc = b.Observation / b.Observation.sum() * 100
    df.loc[b.index, 'Percentage'] = round(perc,2)

In [9]:
df.head(10)

Unnamed: 0,LA_code,LA_name,GI_code,GI_cat,Observation,Percentage
1,E06000001,Hartlepool,1,Gender identity the same as sex,70588,94.51
2,E06000001,Hartlepool,2,Gender identity different from sex,167,0.22
3,E06000001,Hartlepool,3,Trans woman,49,0.07
4,E06000001,Hartlepool,4,Trans man,51,0.07
5,E06000001,Hartlepool,5,Non-binary,33,0.04
6,E06000001,Hartlepool,6,All other gender identities,21,0.03
7,E06000001,Hartlepool,7,Not answered,3777,5.06
9,E06000002,Middlesbrough,1,Gender identity the same as sex,106009,93.04
10,E06000002,Middlesbrough,2,Gender identity different from sex,496,0.44
11,E06000002,Middlesbrough,3,Trans woman,141,0.12


## Read-in shapefile

In [10]:
import geopandas as gpd

In [11]:
gdf = gpd.read_file('Shapefiles/LADs/LAD_MAY_2022_UK_BFE_V3.shp')

DriverError: Shapefiles/LADs/LAD_MAY_2022_UK_BFE_V3.shp: No such file or directory

In [12]:
gdf.head()

Unnamed: 0,LAD22CD,LAD22NM,BNG_E,BNG_N,LONG,LAT,GlobalID,geometry
0,E06000001,Hartlepool,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
1,E06000002,Middlesbrough,451141,516887,-1.21099,54.5447,6d66b015-1f67-40f6-b239-15911fa03834,"POLYGON ((448489.897 522071.798, 448592.597 52..."
2,E06000003,Redcar and Cleveland,464361,519597,-1.00608,54.5675,a5a6513f-916e-4769-bed2-cd019d18719a,"POLYGON ((455525.931 528406.654, 455724.632 52..."
3,E06000004,Stockton-on-Tees,444940,518183,-1.30664,54.5569,14e8450b-7e7c-479a-a335-095ac2d9a701,"POLYGON ((444157.002 527956.304, 444165.898 52..."
4,E06000005,Darlington,428029,515648,-1.56835,54.5353,2f212ecf-daf5-4171-b9c6-825c0d33e5af,"POLYGON ((423496.602 524724.299, 423497.204 52..."


In [13]:
gdf.LAD22NM.nunique()

374

## Cleaning gdf

* rename columns to match 'df'
* get rid of redundant Local Authorities

In [14]:
gdf = gdf.rename(columns = {'LAD22CD': 'LA_code', 'LAD22NM' : 'LA_name'})

In [15]:
gdf = gdf[gdf['LA_code'].isin(df.LA_code.unique())]

In [16]:
gdf.head(50)

Unnamed: 0,LA_code,LA_name,BNG_E,BNG_N,LONG,LAT,GlobalID,geometry
0,E06000001,Hartlepool,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
1,E06000002,Middlesbrough,451141,516887,-1.21099,54.5447,6d66b015-1f67-40f6-b239-15911fa03834,"POLYGON ((448489.897 522071.798, 448592.597 52..."
2,E06000003,Redcar and Cleveland,464361,519597,-1.00608,54.5675,a5a6513f-916e-4769-bed2-cd019d18719a,"POLYGON ((455525.931 528406.654, 455724.632 52..."
3,E06000004,Stockton-on-Tees,444940,518183,-1.30664,54.5569,14e8450b-7e7c-479a-a335-095ac2d9a701,"POLYGON ((444157.002 527956.304, 444165.898 52..."
4,E06000005,Darlington,428029,515648,-1.56835,54.5353,2f212ecf-daf5-4171-b9c6-825c0d33e5af,"POLYGON ((423496.602 524724.299, 423497.204 52..."
5,E06000006,Halton,354246,382146,-2.68853,53.3342,79d75109-ea21-4148-b303-ce7d03795d27,"POLYGON ((351539.901 389475.203, 351630.102 38..."
6,E06000007,Warrington,362744,388456,-2.56167,53.3916,bd72c5e2-e955-4bd6-8d91-f549a7c7f96d,"POLYGON ((367308.201 398265.497, 367315.399 39..."
7,E06000008,Blackburn with Darwen,369490,422806,-2.4636,53.7008,cef1af53-4397-4537-8223-39578967089e,"POLYGON ((369226.299 431801.198, 369234.000 43..."
8,E06000009,Blackpool,332819,436635,-3.02199,53.8216,a578b265-93b1-4a96-89d9-6e2edf36918d,"POLYGON ((332978.998 440832.003, 332985.900 44..."
9,E06000010,"Kingston upon Hull, City of",511894,431650,-0.30382,53.7692,e05ebff0-c763-4b81-aa5d-b94b1a253248,"POLYGON ((510966.600 436533.003, 511217.497 43..."


In [17]:
gdf['LA_name'] = gdf['LA_name'].replace({'Bristol, City of': 'Bristol', 'Kingston upon Hull, City of': 'Kingston upon Hull', 'Herefordshire, County of': 'Herefordshire'})

In [18]:
gdf.head(20)

Unnamed: 0,LA_code,LA_name,BNG_E,BNG_N,LONG,LAT,GlobalID,geometry
0,E06000001,Hartlepool,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
1,E06000002,Middlesbrough,451141,516887,-1.21099,54.5447,6d66b015-1f67-40f6-b239-15911fa03834,"POLYGON ((448489.897 522071.798, 448592.597 52..."
2,E06000003,Redcar and Cleveland,464361,519597,-1.00608,54.5675,a5a6513f-916e-4769-bed2-cd019d18719a,"POLYGON ((455525.931 528406.654, 455724.632 52..."
3,E06000004,Stockton-on-Tees,444940,518183,-1.30664,54.5569,14e8450b-7e7c-479a-a335-095ac2d9a701,"POLYGON ((444157.002 527956.304, 444165.898 52..."
4,E06000005,Darlington,428029,515648,-1.56835,54.5353,2f212ecf-daf5-4171-b9c6-825c0d33e5af,"POLYGON ((423496.602 524724.299, 423497.204 52..."
5,E06000006,Halton,354246,382146,-2.68853,53.3342,79d75109-ea21-4148-b303-ce7d03795d27,"POLYGON ((351539.901 389475.203, 351630.102 38..."
6,E06000007,Warrington,362744,388456,-2.56167,53.3916,bd72c5e2-e955-4bd6-8d91-f549a7c7f96d,"POLYGON ((367308.201 398265.497, 367315.399 39..."
7,E06000008,Blackburn with Darwen,369490,422806,-2.4636,53.7008,cef1af53-4397-4537-8223-39578967089e,"POLYGON ((369226.299 431801.198, 369234.000 43..."
8,E06000009,Blackpool,332819,436635,-3.02199,53.8216,a578b265-93b1-4a96-89d9-6e2edf36918d,"POLYGON ((332978.998 440832.003, 332985.900 44..."
9,E06000010,Kingston upon Hull,511894,431650,-0.30382,53.7692,e05ebff0-c763-4b81-aa5d-b94b1a253248,"POLYGON ((510966.600 436533.003, 511217.497 43..."


In [19]:
merged = pd.merge(df, gdf, on = ['LA_code', 'LA_name'], how = 'left')

In [20]:
merged.head(10)

Unnamed: 0,LA_code,LA_name,GI_code,GI_cat,Observation,Percentage,BNG_E,BNG_N,LONG,LAT,GlobalID,geometry
0,E06000001,Hartlepool,1,Gender identity the same as sex,70588,94.51,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
1,E06000001,Hartlepool,2,Gender identity different from sex,167,0.22,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
2,E06000001,Hartlepool,3,Trans woman,49,0.07,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
3,E06000001,Hartlepool,4,Trans man,51,0.07,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
4,E06000001,Hartlepool,5,Non-binary,33,0.04,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
5,E06000001,Hartlepool,6,All other gender identities,21,0.03,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
6,E06000001,Hartlepool,7,Not answered,3777,5.06,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
7,E06000002,Middlesbrough,1,Gender identity the same as sex,106009,93.04,451141,516887,-1.21099,54.5447,6d66b015-1f67-40f6-b239-15911fa03834,"POLYGON ((448489.897 522071.798, 448592.597 52..."
8,E06000002,Middlesbrough,2,Gender identity different from sex,496,0.44,451141,516887,-1.21099,54.5447,6d66b015-1f67-40f6-b239-15911fa03834,"POLYGON ((448489.897 522071.798, 448592.597 52..."
9,E06000002,Middlesbrough,3,Trans woman,141,0.12,451141,516887,-1.21099,54.5447,6d66b015-1f67-40f6-b239-15911fa03834,"POLYGON ((448489.897 522071.798, 448592.597 52..."


In [21]:
tm = merged[merged.GI_cat == 'Trans man']

In [22]:
tm.shape

(331, 12)

In [23]:
tm

Unnamed: 0,LA_code,LA_name,GI_code,GI_cat,Observation,Percentage,BNG_E,BNG_N,LONG,LAT,GlobalID,geometry
3,E06000001,Hartlepool,4,Trans man,51,0.07,447160,531474,-1.27018,54.6761,2efc9848-300e-4ef3-a36e-58d6856b9817,"POLYGON ((447213.900 537036.104, 447228.798 53..."
10,E06000002,Middlesbrough,4,Trans man,174,0.15,451141,516887,-1.21099,54.5447,6d66b015-1f67-40f6-b239-15911fa03834,"POLYGON ((448489.897 522071.798, 448592.597 52..."
17,E06000003,Redcar and Cleveland,4,Trans man,98,0.09,464361,519597,-1.00608,54.5675,a5a6513f-916e-4769-bed2-cd019d18719a,"POLYGON ((455525.931 528406.654, 455724.632 52..."
24,E06000004,Stockton-on-Tees,4,Trans man,122,0.08,444940,518183,-1.30664,54.5569,14e8450b-7e7c-479a-a335-095ac2d9a701,"POLYGON ((444157.002 527956.304, 444165.898 52..."
31,E06000005,Darlington,4,Trans man,71,0.08,428029,515648,-1.56835,54.5353,2f212ecf-daf5-4171-b9c6-825c0d33e5af,"POLYGON ((423496.602 524724.299, 423497.204 52..."
...,...,...,...,...,...,...,...,...,...,...,...,...
2285,W06000020,Torfaen,4,Trans man,51,0.07,327459,200480,-3.05101,51.6984,a9bfd875-581d-4b13-a88c-0be157037504,"POLYGON ((323898.201 211287.499, 324115.698 21..."
2292,W06000021,Monmouthshire,4,Trans man,32,0.04,337812,209231,-2.90280,51.7783,856d4586-ee01-46c6-8030-155676e72ec8,"MULTIPOLYGON (((345965.201 181030.700, 345944...."
2299,W06000022,Newport,4,Trans man,130,0.1,337897,187432,-2.89769,51.5823,9ded1cac-b393-4732-a068-4fbe1e8881cc,"POLYGON ((342211.900 194845.495, 342221.803 19..."
2306,W06000023,Powys,4,Trans man,54,0.05,302329,273255,-3.43531,52.3486,82c92ce3-674b-41dc-8702-31072fa0fc10,"POLYGON ((322392.901 334017.198, 322378.002 33..."


In [31]:
tm[:3]

Unnamed: 0,LA_code,LA_name,GI_code,GI_cat,Observation,Percentage,geometry
3,E06000001,Hartlepool,4,Trans man,51,0.07,"POLYGON ((447213.900 537036.104, 447233.696 53..."
10,E06000002,Middlesbrough,4,Trans man,174,0.15,"POLYGON ((448489.897 522071.798, 448609.401 52..."
17,E06000003,Redcar and Cleveland,4,Trans man,98,0.09,"POLYGON ((455525.931 528406.654, 455724.632 52..."


In [32]:
tm = tm[:3]

In [33]:
tm = gpd.GeoDataFrame(tm, geometry='geometry')

In [35]:
import plotly.express as px
import json
# Drop unnecessary columns
# tm = tm.drop(['BNG_E', 'BNG_N', 'LONG', 'LAT', 'GlobalID'], axis=1)

# Simplify the geometries
tm['geometry'] = tm.geometry.simplify(tolerance=10, preserve_topology=True)

# Convert to GeoJSON and inspect the size and contents
geojson = tm.geometry.to_json()
print(f"Size of GeoJSON: {len(geojson)} bytes")

# Write GeoJSON to a file to inspect its contents
with open('geometry.geojson', 'w') as f:
    f.write(geojson)

Size of GeoJSON: 72216 bytes


In [36]:
tm['Percentage'] = pd.to_numeric(tm['Percentage'])

In [37]:
tm.index

Index([3, 10, 17], dtype='int64')

In [39]:
tm.reset_index(inplace=True)

Unnamed: 0,index,LA_code,LA_name,GI_code,GI_cat,Observation,Percentage,geometry
0,3,E06000001,Hartlepool,4,Trans man,51,0.07,"POLYGON ((447213.900 537036.104, 447233.696 53..."
1,10,E06000002,Middlesbrough,4,Trans man,174,0.15,"POLYGON ((448489.897 522071.798, 448609.401 52..."
2,17,E06000003,Redcar and Cleveland,4,Trans man,98,0.09,"POLYGON ((455525.931 528406.654, 455724.632 52..."


In [44]:
fig = px.choropleth(tm, geojson = geojson,
                    locations = tm.index,
                    color = 'Percentage',
                    color_continuous_scale = "Viridis",
                    range_color=(0,100))


fig.show()