- Nesse notebook, iremos dar um valor para cada resposta dada mediante a pergunta feita, em cada conjunto de dados.

- Para poupar tempo, utilizamos o código presente no [GitHub](https://github.com/ruslan-kl/lgbt/blob/master/lgbt-survey-analysis.ipynb) para tal.

In [1]:
import numpy as np
import pandas as pd

### Definindo funções auxiliares

- Todas as respostas terão um valor que pertence ao intervalo [-1, 1]. As respostas neutras, como por exemplo, "Dont' Know", terão valor igual a NaN (not a number).


- Basicamente, as respostas para as perguntas se subdividem em 3 conjuntos (podendo variar a forma que a resposta é dada):
    1. {Very widespread, Fairly widespread, Fairly rare, Very rare}
    2. {Yes, No}
    3. {Always, Often, Rarely, Never}
    

- A grande dificuldade é que, nem sempre responder Yes para uma pergunta, terá um _score_ igual a 1. Por exemplo, na pergunta "Have you personally felt discriminated against or harassed because of being perceived as Gay?", a resposta Yes terá _score_ -1; já na pergunta "Does your current partner know that you are L, G, B or T?", a resposta Yes terá _score_ 1.

In [2]:
def set_WidespreadRare_weight(df, questions_list, rare_negative=False):
    """ O parâmetro rare_negative dirá a resposta Rare é uma resposta boa ou ruim """
    if rare_negative:
        weight = -1
    else:
        weight = 1
    for quesID in questions_list:
        df.loc[(df['question_code'] == quesID) & (df['answer'] == 'Very widespread'), 'weight'] = -weight
        df.loc[(df['question_code'] == quesID) & (df['answer'] == 'Fairly widespread'), 'weight'] = -weight/2
        df.loc[(df['question_code'] == quesID) & (df['answer'] == 'Fairly rare'), 'weight'] = weight/2
        df.loc[(df['question_code'] == quesID) & (df['answer'] == 'Very rare'), 'weight'] = weight
        

def set_YesNo_weight(df, questions_list, yes_negative=False):
    """ O parâmetro yes_negative dirá a resposta Yes é uma resposta boa ou ruim """
    if yes_negative:
        weight = -1
    else:
        weight = 1
    for quesID in questions_list:
        df.loc[(df['question_code'] == quesID) & (df['answer'] == 'Yes'), 'weight'] = weight
        df.loc[(df['question_code'] == quesID) & (df['answer'] == 'No'), 'weight'] = -weight
        
        
def set_AlwaysNever_weight(df, questions_list, alsways_negative=False):
    """ O parâmetro yes_negative dirá a resposta Always é uma resposta boa ou ruim """
    if alsways_negative:
        weight = -1
    else:
        weight = 1
    for quesID in questions_list:
        df.loc[(df['question_code'] == quesID) & (df['answer'] == 'Always'), 'weight'] = weight
        df.loc[(df['question_code'] == quesID) & (df['answer'] == 'Often'), 'weight'] = weight/2
        df.loc[(df['question_code'] == quesID) & (df['answer'] == 'Rarely'), 'weight'] = -weight/2
        df.loc[(df['question_code'] == quesID) & (df['answer'] == 'Never'), 'weight'] = -weight

### Função auxiliar para limpar os dados

- Criaremos essa função para limpar os dados após os cálculos, deixando assim, apenas os dados que desejamos trabalhar

In [3]:
def clean_df(df):
    """ Função que limpa os dados """
    
    df = df.dropna()
    df = df.drop('CountryID', axis=1)
    df = df.reset_index(drop=True)
    
    return df

## DailyLife

In [4]:
def clean_df(df):
    """ Limpeza dos dataframes """
    
    df = df.dropna()
    df = df.drop(['CountryID'], axis=1)
    df = df.reset_index(drop=True)
    
    return df

In [5]:
DailyLifeDF = pd.read_csv('Data/cleaned-survey-data-2012/DailyLife.csv')

DailyLifeDF['weight'] = np.NaN
DailyLifeDF.loc[DailyLifeDF['answer'] == 'Don`t know', 'weight'] = np.NaN

set_WidespreadRare_weight(
    df=DailyLifeDF,
    questions_list=[
        'b1_a', 'b1_b', 'b1_c', 'b1_d', 'c1a_a', 'c1a_b', 'c1a_c', 'c1a_d', ''
    ],
    rare_negative=False
)
set_WidespreadRare_weight(
    df=DailyLifeDF,
    questions_list=[
        'b1_e', 'b1_g', 'b1_h', 'b1_i'
    ],
    rare_negative=True
)

# ***** Arrumando as respostas específicas do dataframe ***** #

DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_a') & (DailyLifeDF['answer'] == 'Never happened in the last sixth months'), 'weight'] = 1
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_a') & (DailyLifeDF['answer'] == 'Happened only once in the last six months'), 'weight'] = 0.5
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_a') & (DailyLifeDF['answer'] == '2-5 times in the last six months'), 'weight'] = -0.5
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_a') & (DailyLifeDF['answer'] == '6 times or more in the last six months'), 'weight'] = -1

DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_b') & (DailyLifeDF['answer'] == 'Never happened in the last sixth months'), 'weight'] = 1
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_b') & (DailyLifeDF['answer'] == 'Happened only once in the last six months'), 'weight'] = 0.5
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_b') & (DailyLifeDF['answer'] == '2-5 times in the last six months'), 'weight'] = -0.5
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_b') & (DailyLifeDF['answer'] == '6 times or more in the last six months'), 'weight'] = -1

DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_c') & (DailyLifeDF['answer'] == 'Never happened in the last sixth months'), 'weight'] = 1
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_c') & (DailyLifeDF['answer'] == 'Happened only once in the last six months'), 'weight'] = 0.5
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_c') & (DailyLifeDF['answer'] == '2-5 times in the last six months'), 'weight'] = -0.5
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'g4_c') & (DailyLifeDF['answer'] == '6 times or more in the last six months'), 'weight'] = -1

DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'h15') & (DailyLifeDF['answer'] == 'Yes'), 'weight'] = -1
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'h15') & (DailyLifeDF['answer'] == 'No'), 'weight'] = 1
DailyLifeDF.loc[(DailyLifeDF['question_code'] == 'h15') & (DailyLifeDF['answer'] == 'I did not need or use any benefits or services'), 'weight'] = np.NaN

DailyLifeDF = clean_df(DailyLifeDF)
DailyLifeDF.head()

Unnamed: 0,CountryName,subset,question_code,question_label,answer,percentage,weight
0,Austria,Lesbian,b1_a,"In your opinion, how widespread is offensive l...",Very widespread,8.0,-1.0
1,Austria,Lesbian,b1_a,"In your opinion, how widespread is offensive l...",Fairly widespread,34.0,-0.5
2,Austria,Lesbian,b1_a,"In your opinion, how widespread is offensive l...",Fairly rare,45.0,0.5
3,Austria,Lesbian,b1_a,"In your opinion, how widespread is offensive l...",Very rare,9.0,1.0
4,Austria,Gay,b1_a,"In your opinion, how widespread is offensive l...",Very widespread,4.0,-1.0


## Right Awareness

In [6]:
RightsAwarenessDF = pd.read_csv('Data/cleaned-survey-data-2012/RightsAwareness.csv')

RightsAwarenessDF['weight'] = np.NaN
RightsAwarenessDF.loc[RightsAwarenessDF['answer'] == 'Don`t know', 'weight'] = np.NaN
RightsAwarenessDF.loc[RightsAwarenessDF['answer'] == 'No', 'weight'] = -1
RightsAwarenessDF.loc[RightsAwarenessDF['answer'] == 'Yes', 'weight'] = 1

RightsAwarenessDF = clean_df(RightsAwarenessDF)
RightsAwarenessDF.head()

Unnamed: 0,CountryName,subset,question_code,question_label,answer,percentage,weight
0,Austria,Lesbian,d1,"In the country where you live, is there a law ...",Yes,53.0,1.0
1,Austria,Lesbian,d1,"In the country where you live, is there a law ...",No,10.0,-1.0
2,Austria,Gay,d1,"In the country where you live, is there a law ...",Yes,45.0,1.0
3,Austria,Gay,d1,"In the country where you live, is there a law ...",No,16.0,-1.0
4,Austria,Bisexual women,d1,"In the country where you live, is there a law ...",Yes,51.0,1.0


## Discrimination

In [7]:
DiscriminationDF = pd.read_csv('Data/cleaned-survey-data-2012/Discrimination.csv')

set_YesNo_weight(
    df=DiscriminationDF,
    questions_list=[
        'c2a_a', 'c2a_b', 'c2a_c', 'c2a_d', 'c2_b', 'c2_c', 'c4_a', 'c4_b', 
        'c4_c', 'c4_d', 'c4_e', 'c4_f', 'c4_g', 'c4_h', 'c4_i', 'c4_j', 'c4_k', 'discrim1yr'
    ],
    yes_negative=True
)

set_AlwaysNever_weight(
    df=DiscriminationDF,
    questions_list=[
        'c8a_b', 'c8a_c', 'c8a_d', 'c8a_e', 'c8a_f', 'c9_b', 'c9_c', 'c9_d', 'c9_e'
    ],
    alsways_negative=True
)

set_AlwaysNever_weight(
    df=DiscriminationDF,
    questions_list=[
        'c8a_a', 'c9_a'
    ],
    alsways_negative=False
)

# ***** Arrumando as respostas específicas do dataframe ***** #

DiscriminationDF['weight'] = np.NaN
DiscriminationDF.loc[DiscriminationDF['answer'] == 'Don`t know', 'weight'] = np.NaN
DiscriminationDF.loc[(DiscriminationDF['question_code'] == 'c10') & (DiscriminationDF['answer'] == 'None of the above'), 'weight'] = 0
DiscriminationDF.loc[(DiscriminationDF['question_code'] == 'c10') & (DiscriminationDF['answer'] == 'I have never accessed healthcare services'), 'weight'] = 0
DiscriminationDF.loc[(DiscriminationDF['question_code'] == 'c10') & (DiscriminationDF['answer'] == 'Difficulty in gaining access to healthcare'), 'weight'] = -1
DiscriminationDF.loc[(DiscriminationDF['question_code'] == 'c10') & (DiscriminationDF['answer'] == 'Having to change general practitioners or other specialists due to their negative reaction'), 'weight'] = -1
DiscriminationDF.loc[(DiscriminationDF['question_code'] == 'c10') & (DiscriminationDF['answer'] == 'Receiving unequal treatment when dealing with medical staff'), 'weight'] = -1
DiscriminationDF.loc[(DiscriminationDF['question_code'] == 'c10') & (DiscriminationDF['answer'] == 'Foregoing treatment for fear of discrimination or intolerant reactions'), 'weight'] = -1
DiscriminationDF.loc[(DiscriminationDF['question_code'] == 'c10') & (DiscriminationDF['answer'] == 'Specific needs ignored (not taken into account)'), 'weight'] = -1
DiscriminationDF.loc[(DiscriminationDF['question_code'] == 'c10') & (DiscriminationDF['answer'] == 'Inappropriate curiosity'), 'weight'] = -1
DiscriminationDF.loc[(DiscriminationDF['question_code'] == 'c10') & (DiscriminationDF['answer'] == 'Pressure or being forced to undergo any medical or psychological test'), 'weight'] = -1

DiscriminationDF = clean_df(DiscriminationDF)
DiscriminationDF.head()

Unnamed: 0,CountryName,subset,question_code,question_label,answer,percentage,weight
0,Austria,Lesbian,c10,Have you ever experienced any of the following...,Difficulty in gaining access to healthcare,2.0,-1.0
1,Austria,Lesbian,c10,Have you ever experienced any of the following...,Having to change general practitioners or othe...,8.0,-1.0
2,Austria,Lesbian,c10,Have you ever experienced any of the following...,Receiving unequal treatment when dealing with ...,3.0,-1.0
3,Austria,Lesbian,c10,Have you ever experienced any of the following...,Foregoing treatment for fear of discrimination...,5.0,-1.0
4,Austria,Lesbian,c10,Have you ever experienced any of the following...,Specific needs ignored (not taken into account),12.0,-1.0


## Violence and Harassment

In [8]:
ViolenceAndHarassmentDF = pd.read_csv('Data/cleaned-survey-data-2012/ViolenceAndHarrassment.csv')

ViolenceAndHarassmentDF['weight'] = np.NaN

ViolenceAndHarassmentDF.loc[ViolenceAndHarassmentDF['answer'] == 'Don`t know', 'weight'] = np.NaN
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'e1') & (ViolenceAndHarassmentDF['answer'] == 'I do not have a same-sex partner'), 'weight'] = np.NaN
set_YesNo_weight(
    df=ViolenceAndHarassmentDF,
    questions_list=[
        'e1', 'e2', 'f1_a', 'f1_b', 'fa1_5', 'fa2_5', 'fb1_5', 'fb2_5'
    ],
    yes_negative=True
)
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fa1_3') & (ViolenceAndHarassmentDF['answer'] == 'More than ten times'), 'weight'] = -1
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fa1_3') & (ViolenceAndHarassmentDF['answer'] == 'Six to ten times'), 'weight'] = -0.86
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fa1_3') & (ViolenceAndHarassmentDF['answer'] == 'Five times'), 'weight'] = -0.71
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fa1_3') & (ViolenceAndHarassmentDF['answer'] == 'Four times'), 'weight'] = -0.57
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fa1_3') & (ViolenceAndHarassmentDF['answer'] == 'Three times'), 'weight'] = -0.43
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fa1_3') & (ViolenceAndHarassmentDF['answer'] == 'Twice'), 'weight'] = -0.29
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fa1_3') & (ViolenceAndHarassmentDF['answer'] == 'Once'), 'weight'] = -0.14

ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fb1_3') & (ViolenceAndHarassmentDF['answer'] == 'More than ten times'), 'weight'] = -1
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fb1_3') & (ViolenceAndHarassmentDF['answer'] == 'Six to ten times'), 'weight'] = -0.86
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fb1_3') & (ViolenceAndHarassmentDF['answer'] == 'Five times'), 'weight'] = -0.71
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fb1_3') & (ViolenceAndHarassmentDF['answer'] == 'Four times'), 'weight'] = -0.57
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fb1_3') & (ViolenceAndHarassmentDF['answer'] == 'Three times'), 'weight'] = -0.43
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fb1_3') & (ViolenceAndHarassmentDF['answer'] == 'Twice'), 'weight'] = -0.29
ViolenceAndHarassmentDF.loc[(ViolenceAndHarassmentDF['question_code'] == 'fb1_3') & (ViolenceAndHarassmentDF['answer'] == 'Once'), 'weight'] = -0.14

ViolenceAndHarassmentDF = clean_df(ViolenceAndHarassmentDF)
ViolenceAndHarassmentDF.head()

Unnamed: 0,CountryName,subset,question_code,question_label,answer,percentage,weight
0,Austria,Lesbian,e1,Do you avoid holding hands in public with a sa...,Yes,33.0,-1.0
1,Austria,Lesbian,e1,Do you avoid holding hands in public with a sa...,No,53.0,1.0
2,Austria,Gay,e1,Do you avoid holding hands in public with a sa...,Yes,51.0,-1.0
3,Austria,Gay,e1,Do you avoid holding hands in public with a sa...,No,25.0,1.0
4,Austria,Bisexual women,e1,Do you avoid holding hands in public with a sa...,Yes,18.0,-1.0


### Salvando os dataframes

In [9]:
DailyLifeDF.to_csv('Data/weighted-survey-data-2012/DailyLife.csv', index=False)
DiscriminationDF.to_csv('Data/weighted-survey-data-2012/Discrimination.csv', index=False)
RightsAwarenessDF.to_csv('Data/weighted-survey-data-2012/RightsAwareness.csv', index=False)
ViolenceAndHarassmentDF.to_csv('Data/weighted-survey-data-2012/ViolenceAndHarassment.csv', index=False)