In [1]:
import pandas as pd
import numpy as np
import os

import matplotlib.pyplot as plt
import plotly.express as px

# DS0005: TransPop 1, 2, and Cisgender Survey

In [2]:
combined = pd.read_csv('data/combined.csv')
combined

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0.1,Unnamed: 0,STUDYID,WEIGHT_CISGENDER_TRANSPOP,WEIGHT_CISGENDER,WEIGHT_TRANSPOP,GMETHOD_TYPE,SURVEYCOMPLETED,GRESPONDENT_DATE,GCENREG,RACE,...,ACE_SEP,ACE_INC,ACE_EMO_I,ACE_PHY_I,ACE_SEX_I,ACE_IPV_I,ACE_SUB_I,ACE_MEN_I,ACE_SEP_I,ACE_INC_I
0,0,151768927,0.022039,,.9861429333687,,0,26-APR-2016,1,6,...,0,0,1,0,0,0,0,0,0,0
1,1,152357242,0.008485,,.3796825110912,,0,07-APR-2016,3,6,...,0,0,0,0,0,0,0,1,0,0
2,2,152444055,0.015764,,.705381155014,,0,01-MAY-2016,3,6,...,0,0,0,0,0,0,0,1,0,0
3,3,152525272,0.035655,,1.5953975915909,,0,20-APR-2016,4,6,...,1,1,1,0,0,1,1,1,1,1
4,4,152894493,0.041802,,1.870422244072,,0,05-MAY-2016,2,8,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1431,1431,197167223,6.299798,5.1374626159668,,SCAN,2,27-DEC-2018,4,3,...,0,0,1,1,0,1,1,1,0,0
1432,1432,197167224,0.581383,.4741154909134,,SCAN,2,15-JAN-2019,4,6,...,0,0,1,0,1,0,0,1,0,0
1433,1433,197168195,2.584832,2.1079211235046,,WEB,2,13-DEC-2018,1,6,...,0,0,1,1,1,1,0,1,0,0
1434,1434,197169207,0.545099,.4445266127586,,WEB,2,08-DEC-2018,4,6,...,0,0,0,0,0,0,0,0,0,0


In [3]:
combined['trans_cis'.upper()].value_counts()

2    1162
1     274
Name: TRANS_CIS, dtype: int64

# Violence and Discrimination

whether transgenders are more likely to encounter violence than cisgenders)
- Bullying (Q189, 190, p.15)
- Victimization and Discrimination (Q162- Q168, p.12)
- Household intimate partner violence: (TransPop: Q183)
- Adverse Childhood Experiences (Q178-Q188)
    - ace, ace_emo, ace_phy, ace_sex, ace_ipv, ace_men
- Everyday discrimination (TransPop: Q171.a - ii and Cisgender: Q121.a - i, p.14)


### Bullying Data Dictionary

- Q189: How often, if ever, were you bullied before you were 18 years old?
- Q190_: When you were bullied before you were 18 years old, would you say it was because of your...
    - 1: Age
    - 2: Sex (being female or male)
    - 3: Being transgender
    - 4: Gender expression or appearance
    - 5: Race/ethnicity
    - 6: Income level or education
    - 7: Sexual orientation
    - 8: Physical appearance (e.g. weight, height)
    - 9: Religion/spirituality
    - 10: Disability

In [4]:
bullying_col = ['Q189'] + [col for col in combined.columns if 'Q190_' in col]
bullying = combined[['TRANS_CIS'] + bullying_col]
bullying

Unnamed: 0,TRANS_CIS,Q189,Q190_1,Q190_2,Q190_3,Q190_4,Q190_5,Q190_6,Q190_7,Q190_8,Q190_9,Q190_10
0,1,3,,,,,,,,,,
1,1,2,,,,4,,,,8,,
2,1,1,,2,,,,6,7,,,
3,1,2,,2,,,,,,8,,
4,1,4,7,7,7,7,7,7,97,97,97,97
...,...,...,...,...,...,...,...,...,...,...,...,...
1431,2,4,7,7,7,7,7,7,97,97,97,97
1432,2,2,,,,,,,,,,
1433,2,2,,,,4,,,,8,,
1434,2,2,1,,,,,,,,,


In [5]:
bullying['Q189'].value_counts()

2    390
4    369
3    350
1    279
      48
Name: Q189, dtype: int64

In [6]:
bullying['Q190_3'].value_counts()

     985
7    417
3     34
Name: Q190_3, dtype: int64

In [7]:
q189 = (
    bullying
    .groupby(['TRANS_CIS', 'Q189'])
    .count()[['Q190_1']]
    .rename(columns={'Q190_1': 'count'})
    .reset_index()
)
q189

Unnamed: 0,TRANS_CIS,Q189,count
0,1,,3
1,1,1.0,123
2,1,2.0,65
3,1,3.0,50
4,1,4.0,33
5,2,,45
6,2,1.0,156
7,2,2.0,325
8,2,3.0,300
9,2,4.0,336


In [8]:
q189['TRANS_CIS'] = q189['TRANS_CIS'].map({1: 'Trans', 2: 'Cis'})
q189['Q189'] = q189['Q189'].map({'1': 'Often', '2': 'Sometimes', '3': 'Rarely', '4': 'Never'})
q189

Unnamed: 0,TRANS_CIS,Q189,count
0,Trans,,3
1,Trans,Often,123
2,Trans,Sometimes,65
3,Trans,Rarely,50
4,Trans,Never,33
5,Cis,,45
6,Cis,Often,156
7,Cis,Sometimes,325
8,Cis,Rarely,300
9,Cis,Never,336


In [105]:
q189_graph = px.bar(
    q189, x='Q189', y='count', 
    color='TRANS_CIS', 
    barmode='group', 
    color_discrete_sequence=['#03d7fc', '#fca2bb'],
    title='<b>Q189: How often, if ever, were you bullied before you were 18 years old?</b>'
)
q189_graph.show()

In [13]:
bullying[bullying['Q189']=='4']

Unnamed: 0,TRANS_CIS,Q189,Q190_1,Q190_2,Q190_3,Q190_4,Q190_5,Q190_6,Q190_7,Q190_8,Q190_9,Q190_10
4,1,4,7,7,7,7,7,7,97,97,97,97
7,1,4,7,7,7,7,7,7,97,97,97,97
29,1,4,7,7,7,7,7,7,97,97,97,97
31,1,4,7,7,7,7,7,7,97,97,97,97
32,1,4,7,7,7,7,7,7,97,97,97,97
...,...,...,...,...,...,...,...,...,...,...,...,...
1413,2,4,7,7,7,7,7,7,97,97,97,97
1424,2,4,7,7,7,7,7,7,97,97,97,97
1429,2,4,7,7,7,7,7,7,97,97,97,97
1430,2,4,7,7,7,7,7,7,97,97,97,97


In [49]:
some_bullying = bullying[(bullying['Q189']!='4') & (bullying['Q189']!=' ')]
some_bullying

Unnamed: 0,TRANS_CIS,Q189,Q190_1,Q190_2,Q190_3,Q190_4,Q190_5,Q190_6,Q190_7,Q190_8,Q190_9,Q190_10
0,1,3,,,,,,,,,,
1,1,2,,,,4,,,,8,,
2,1,1,,2,,,,6,7,,,
3,1,2,,2,,,,,,8,,
5,1,3,1,2,,,,6,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1428,2,3,,,,,,,,,,
1432,2,2,,,,,,,,,,
1433,2,2,,,,4,,,,8,,
1434,2,2,1,,,,,,,,,


In [35]:
some_bullying['Q190_10'].value_counts()

      963
10     56
Name: Q190_10, dtype: int64

In [50]:
some_bullying = some_bullying.copy()
some_bullying['Q189'] = some_bullying['Q189'].astype(int)
some_bullying

Unnamed: 0,TRANS_CIS,Q189,Q190_1,Q190_2,Q190_3,Q190_4,Q190_5,Q190_6,Q190_7,Q190_8,Q190_9,Q190_10
0,1,3,,,,,,,,,,
1,1,2,,,,4,,,,8,,
2,1,1,,2,,,,6,7,,,
3,1,2,,2,,,,,,8,,
5,1,3,1,2,,,,6,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1428,2,3,,,,,,,,,,
1432,2,2,,,,,,,,,,
1433,2,2,,,,4,,,,8,,
1434,2,2,1,,,,,,,,,


In [51]:
def one_hot_encode_190(ser, col_n):
    return ser.map({' ': 0, col_n: 1})

for i in range(1,11):
    some_bullying[f'Q190_{i}'] = one_hot_encode_190(some_bullying[f'Q190_{i}'], str(i))

some_bullying

0        
1        
2        
3        
5       1
       ..
1428     
1432     
1433     
1434    1
1435     
Name: Q190_1, Length: 1019, dtype: object
0        
1        
2       2
3       2
5       2
       ..
1428     
1432     
1433     
1434     
1435     
Name: Q190_2, Length: 1019, dtype: object
0        
1        
2        
3        
5        
       ..
1428     
1432     
1433     
1434     
1435     
Name: Q190_3, Length: 1019, dtype: object
0        
1       4
2        
3        
5        
       ..
1428     
1432     
1433    4
1434     
1435     
Name: Q190_4, Length: 1019, dtype: object
0        
1        
2        
3        
5        
       ..
1428     
1432     
1433     
1434     
1435     
Name: Q190_5, Length: 1019, dtype: object
0        
1        
2       6
3        
5       6
       ..
1428     
1432     
1433     
1434     
1435    6
Name: Q190_6, Length: 1019, dtype: object
0        
1        
2       7
3        
5        
       ..
1428     
1432     
1433    

Unnamed: 0,TRANS_CIS,Q189,Q190_1,Q190_2,Q190_3,Q190_4,Q190_5,Q190_6,Q190_7,Q190_8,Q190_9,Q190_10
0,1,3,0,0,0,0,0,0,0,0,0,0
1,1,2,0,0,0,1,0,0,0,1,0,0
2,1,1,0,1,0,0,0,1,1,0,0,0
3,1,2,0,1,0,0,0,0,0,1,0,0
5,1,3,1,1,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1428,2,3,0,0,0,0,0,0,0,0,0,0
1432,2,2,0,0,0,0,0,0,0,0,0,0
1433,2,2,0,0,0,1,0,0,0,1,0,0
1434,2,2,1,0,0,0,0,0,0,0,0,0


In [55]:
q190_col = [col for col in combined.columns if 'Q190_' in col]
some_bullying[q190_col].sum(axis=1)

0       0
1       2
2       3
3       2
5       3
       ..
1428    0
1432    0
1433    2
1434    1
1435    1
Length: 1019, dtype: int64

In [56]:
some_bullying

Unnamed: 0,TRANS_CIS,Q189,Q190_1,Q190_2,Q190_3,Q190_4,Q190_5,Q190_6,Q190_7,Q190_8,Q190_9,Q190_10
0,1,3,0,0,0,0,0,0,0,0,0,0
1,1,2,0,0,0,1,0,0,0,1,0,0
2,1,1,0,1,0,0,0,1,1,0,0,0
3,1,2,0,1,0,0,0,0,0,1,0,0
5,1,3,1,1,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1428,2,3,0,0,0,0,0,0,0,0,0,0
1432,2,2,0,0,0,0,0,0,0,0,0,0
1433,2,2,0,0,0,1,0,0,0,1,0,0
1434,2,2,1,0,0,0,0,0,0,0,0,0


In [83]:
q190 = some_bullying.groupby('TRANS_CIS').sum() #.reset_index()
# q190['TRANS_CIS'] = q190['TRANS_CIS'].map({1: 'Trans', 2: 'Cis'})
q190

Unnamed: 0_level_0,Q189,Q190_1,Q190_2,Q190_3,Q190_4,Q190_5,Q190_6,Q190_7,Q190_8,Q190_9,Q190_10
TRANS_CIS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,403,43,57,34,110,25,41,64,144,14,33
2,1706,220,114,0,28,60,93,22,346,29,23


In [101]:
graph_df = pd.melt(q190.reset_index(), id_vars='TRANS_CIS').iloc[2:, :]
graph_df['TRANS_CIS'] = graph_df['TRANS_CIS'].map({1: 'Trans', 2: 'Cis'})
graph_df

Unnamed: 0,TRANS_CIS,variable,value
2,Trans,Q190_1,43
3,Cis,Q190_1,220
4,Trans,Q190_2,57
5,Cis,Q190_2,114
6,Trans,Q190_3,34
7,Cis,Q190_3,0
8,Trans,Q190_4,110
9,Cis,Q190_4,28
10,Trans,Q190_5,25
11,Cis,Q190_5,60


In [104]:
q190_graph = px.bar(
    graph_df, x='variable', y='value', 
    barmode='group',
    color='TRANS_CIS',
    color_discrete_sequence=['#03d7fc', '#fca2bb'],
    title='<b>Q190: When you were bullied before you were 18 years old, would you say it was because of your...</b>'
)
q190_graph.show()

    - 1: Age
    - 2: Sex (being female or male)
    - 3: Being transgender
    - 4: Gender expression or appearance
    - 5: Race/ethnicity
    - 6: Income level or education
    - 7: Sexual orientation
    - 8: Physical appearance (e.g. weight, height)
    - 9: Religion/spirituality
    - 10: Disability