# Long-term consequences of the most serious incident of physical and/or sexual violence by a partner since the age of 15

Source: [Violence against women survey results](http://fra.europa.eu/en/vaw-survey-results)

View this notebook on [nbviewer](http://nbviewer.ipython.org/github/gauden/notebooks/blob/master/vaw.ipynb)

##  Citation

##  Setup

In [1]:
from __future__ import print_function, unicode_literals, division

In [2]:
import os

import numpy as np
import pandas as pd

import requests
import pycountry

  from pkg_resources import resource_stream


In [3]:
# (*) To communicate with Plotly's server, sign in with credentials file
import plotly.plotly as py  

# (*) Useful Python/Plotly tools
import plotly.tools as tls   

# (*) Graph objects to piece together plots
from plotly.graph_objs import *

## Download and Create DataFrame

In [4]:
url = ('http://fra.europa.eu/DVS/render/?locale=EN&dataSource=VAW&'
       'media=xls&width=740&plot=euBars&topic=2.+Consequences+of+physical+'
       'and+sexual+violence&question=DVS_B02&superSubset=1&'
       'subset=AllSubset&subsetValue=01--All')
local_file = os.path.join('data', 'vaw.xls')

In [5]:
if not os.path.isfile(local_file):
    result = requests.get(url)
    if result.status_code == 200:
        with open(local_file, 'w') as fh:
            fh.write(result.content)
    else:
        raise IOError('Could not download the data. Check URL and network connection.')

In [6]:
data = pd.read_csv(local_file, sep='\t', header=False)
data.head()

Unnamed: 0,CountryCode,question_code,subset,answer,percentage
0,AT,DVS_B02,01. All,01. Depression,21
1,AT,DVS_B02,01. All,02. Anxiety,46
2,AT,DVS_B02,01. All,03. Panic attacks,19
3,AT,DVS_B02,01. All,04. Loss of self-confidence,37
4,AT,DVS_B02,01. All,05. Feeling vulnerable,39


##  Clean Data

### Relabel Country Codes

In [7]:
def expand_country(code):
    try:
        expansion = pycountry.countries.get(alpha2=code).name
    except KeyError:
        if code == 'UK':
            expansion = 'United Kingdom'
        elif code == 'EL':
            expansion = 'EU Average'
        else:
            expansion = code
    return expansion

data['country'] = data.CountryCode.map(lambda x: expand_country(x))

### Drop Extra Columns and Numbering of Answers

In [8]:
data.answer = data.answer.str[4:]
data.drop(['question_code', 'subset', 'CountryCode'], axis=1, inplace=True)

### Drop rows with Averages and Answers for 'Other'

In [9]:
data = data[data.country.str[-6:] != 'verage']  # drop 'average' rows

In [10]:
data = data[data.answer != 'Other']  # drop answers with 'Other'

### View Tidy Data and Table

In [11]:
# Calculate the order of columns based on 
# the median for each category in original dataframe
order = data.groupby('answer').median().sort(columns='percentage', ascending=False)

In [12]:
data.head()

Unnamed: 0,answer,percentage,country
0,Depression,21,Austria
1,Anxiety,46,Austria
2,Panic attacks,19,Austria
3,Loss of self-confidence,37,Austria
4,Feeling vulnerable,39,Austria


In [13]:
data.pivot(index='answer', columns='country', values='percentage').T

answer,Anxiety,Concentration difficulties,Depression,Difficulties in relationships,Difficulty in sleeping,Feeling vulnerable,Loss of self-confidence,Panic attacks
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Austria,46,20,21,32,38,39,37,19
Belgium,29,15,20,36,32,31,36,15
Bulgaria,43,11,28,24,21,30,36,24
Croatia,40,23,30,18,31,49,32,20
Cyprus,48,21,24,23,26,27,24,24
Czech Republic,35,7,17,17,19,33,22,10
Denmark,23,16,13,38,21,46,37,8
Estonia,32,10,35,42,28,30,34,17
Finland,34,12,23,29,22,34,34,7
France,30,13,22,28,32,28,34,15


In [14]:
data = data.pivot(index='answer', columns='country', values='percentage').T

In [15]:
data = data[order.index]

In [16]:
data.head()

answer,Anxiety,Feeling vulnerable,Loss of self-confidence,Difficulties in relationships,Difficulty in sleeping,Depression,Panic attacks,Concentration difficulties
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Austria,46,39,37,32,38,21,19,20
Belgium,29,31,36,36,32,20,15,15
Bulgaria,43,30,36,24,21,28,24,11
Croatia,40,49,32,18,31,30,20,23
Cyprus,48,27,24,23,26,24,24,21


## Heatmap

In [17]:
plotly_data = Data([
    Heatmap(
        z=data.as_matrix(),
        x=data.columns,
        y=data.index,
        name='y',
        zmin=7,
        zmax=55,
        colorscale=[[0, 'rgb(220, 220, 220)'], [0.2, 'rgb(245, 195, 157)'], [0.4, 'rgb(245, 160, 105)'], [1, 'rgb(178, 10, 28)']],
        colorbar=ColorBar(
            title='% Respondents'
        )
    )
])
layout = Layout(
    title='Psychological Consequences of Physical or Sexual Abuse',
    showlegend=False,
    autosize=False,
    width=700,
    height=1000,
    xaxis=XAxis(
        title='',
        titlefont=Font(
            family='Open Sans, sans-serif'
        ),
        range=[-0.5, 7.5],
        type='category',
        autorange=True,
        showgrid=False,
        zeroline=False,
        showline=True,
        ticks='inside',
        mirror='allticks',
        linecolor='rgb(34,34,34)',
        linewidth=1
    ),
    yaxis=YAxis(
        titlefont=Font(
            family='Open Sans, sans-serif'
        ),
        range=[-0.5, 28.5],
        type='category',
        autorange='reversed',
        showgrid=False,
        zeroline=False,
        showline=True,
        ticks='inside',
        mirror='allticks',
        linecolor='rgb(34,34,34)',
        linewidth=1
    ),
    legend=Legend(
        x=0.02,
        y=1,
        bgcolor='rgba(255, 255, 255, 0.5)',
        xref='paper',
        yref='paper'
    ),
    margin=Margin(
        l=120,
        r=120,
        b=150,
        pad=0
    ),
    paper_bgcolor='white',
    plot_bgcolor='white',
    hovermode='x',
    dragmode='zoom'
)
fig = Figure(data=plotly_data, layout=layout)
py.iplot(fig, height=1000)

## Boxplots

In [18]:
x_data = ['<br>'.join(label.split())
          for label in data.columns]

y_data = data.values.T

traces = []

# for xd, yd, cls in zip(x_data, y_data, colors):
for xd, yd in zip(x_data, y_data):
        traces.append(Box(
            y=yd,
            name=xd,
            boxpoints=False,
            whiskerwidth=0.2,
            marker=Marker(
                size=2,
            ),
            line=Line(width=1),
        ))

layout = Layout(
    title='Psychological Consequences of Physical or Sexual Abuse<br>(Percentage of Respondents reporting the effect)',
    yaxis=YAxis(
        autorange=True,
        showgrid=True,
        zeroline=True,
        dtick=5,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=2,
    ),
    margin=Margin(
        l=40,
        r=30,
        b=80,
        t=100,
    ),
    width=600,
    height=600,
    showlegend=False
)

fig = Figure(data=traces, layout=layout)
py.iplot(fig)