This script reproduces the statistics published at trygghetsundersokelsen.no 

Please confer the following site for how to access data from the National Crime Survey:
https://trygghetsundersokelsen.no/data.html#en

For data documentation: https://data.trygghetsundersokelsen.no

Content: 
Frequencies for the whole population is calculated for all variables.
But all variables is not plotted against all demographic variables. 
For each variable, the script applies three demographic variables. 
But all demographic variables are used at some point. 
The demographic variables are as follows: 

- Age
- Gender
- Education
- Immigration
- Married
- Children
- County

**Weighting**: All frequencies are weighted using `weight_edu`. The weight is based on demographic variables (age, gender and geography) and education level.

**DISCLAIMER**: OsloMet, ideas2evidence, Frischsenteret and the Ministry of Justice and Public Security do not take any responsibility for the use and interpretation of data from the National Crime Survey. The above mentioned further take no responsibility for any negative consequences that may arise as a result of the use of this data.

In [None]:

# Data import 

import pandas as pd
import numpy as np

with open("path.txt", "r", encoding="utf8") as file:
    path = file.read().replace("\n", "")


df = pd.read_spss(path + "Norwegian Crime Survey NSD v1.sav")

#grouping features

groups = ['aldersgruppe_vuttrekk_dsf', 'kjonn_dsf', 'utdanning_kort', 'innvandret', 'gift', 'barn', 'fylkenr_dsf']


### Recoding

Recoding of features. 

All features used are recoded/mapped so that missing-categories are NaN. In addition, some response alternatives are combined/merged.

In [None]:
immigration_mapping = {
    "No": "No",
    "I myself have immigrated to Norway": "Have immigrated to Norway",
    "Both my parents have immigrated to Norway, but I was born in Norway": "Born in Norway, but both or one of the parents immigrated",
    "Mother has immigrated to Norway, but my father and I have not immigrated": "Born in Norway, but both or one of the parents immigrated",
    "Father has immigrated to Norway, but my mother and I have not immigrated": "Born in Norway, but both or one of the parents immigrated",
    #"Not answered": NaN
}

df = df.assign(innvandret = df.innvandret.map(immigration_mapping))

df.innvandret.value_counts()

In [None]:
married_mapping = {
    "Yes, married/registered partner": "Married or cohabitant",
    "No, living alone": "Living alone",
    "Yes, cohabitant": "Married or cohabitant"
}

df = df.assign(gift = df.gift.map(married_mapping))

df.gift.value_counts()


In [None]:
df['utdanning_kort'] = df['utdanning_kort'].map({
    "Higher education": "Higher education",
    "Vocational school / High school": "Vocational school / High school",
    "No education / Primary school": "No education / Primary school"
})

df.utdanning_kort.value_counts()

In [None]:
df = df.assign(barn = df.barn.map({
    "Yes": "Has children",
    "No": "Does not have children"
}))

df.barn.value_counts()

In [None]:
safety1 = ['urtrygg1', 'urtrygg3']
safety2 = ['urtrygg2']
worry = ['urkrim_1', 'urkrim_2', 'urkrim_5', 'urkrim_7', 'urkrim_8', 'urkrim_9', 'urkrim_10']
exposure = ['utsibilde1', 'utsiident1', 'uttbol1', 'uttlom1', 'uttran1', 'uttruss1', 'uttsyk1'] #utvrist, utvslag

# Recode safety variables

df['urtrygg1'] = df['urtrygg1'].map({
    'Very safe': 'Very safe',
    'Pretty safe': 'Pretty safe',
    'Pretty insecure': 'Pretty insecure',
    'Very insecure': 'Very insecure'
    
})

df['urtrygg2'] = df['urtrygg2'].map({
    'Yes, I always plan so that I am as safe as possible': 'Always plan ahead',
    'Sometimes / it depends on the situation': 'Depends on the situation/Does not think about it',
    'No, I\'m not thinking about whether I could be the victim of a crime': 'Depends on the situation/Does not think about it'
    
})

df['urtrygg3'] = df['urtrygg3'].map({
    'To a large degree': 'To a large degree',
    'To some degree': 'To some degree',
    'To a small degree': 'To a small degree',
    'Not at all': 'Not at all'
})

# Recode urkrim-variables
urkrim_mapping = {
    "Quite often": "Quite often",
    "Pretty often": "Pretty often",
    "Pretty rare": "Pretty rare",
    "Quite rare": "Quite rare",
    "Never": "Never"
}

for feature in worry:
    df[feature] = df[feature].map(urkrim_mapping)

# Recode exposure-variables
## utvslag slightly different response scale on utvslag

df['utvslag'] = df['utvslag'].map({
    "Yes, (number of times this happened to you in 2020)": "Yes",
    "No": "No"
})

df['utvrist'] = df['utvrist'].map({
    "Yes, (number of times this happened to you in 2020)": "Yes",
    "No": "No"
})

# same mapping for the rest of them
exposure_mapping = {
    "Yes, (number of times)": "Yes",
    "No": "No"
}

for feature in exposure:
    df[feature] = df[feature].map(exposure_mapping)



### Functions

`weighted_frequency` calculates the frequency of `feature`, using the education weight `weight_edu`.

`weighted_frequency` calculates the frequency of `feature` by `group`, using the education weight `weight_edu`.

In [None]:
def weighted_frequency(df, feature, print_ = False):
    series = df.groupby([feature])['weight_edu'].sum()

     # convert pd.series to pd.dataframe
    x = series.to_frame()

    # rownames to new columns
    x.index.name = feature
    x.reset_index(inplace=True)

     # rename column
    x.rename(columns={'weight_edu': 'count'}, inplace = True)

    # remove decimals
    x = x.round(0)

    # calculate percentage
    x['pct'] = x['count'] / sum(x['count'])

    if(print_):
        print(x)
    
    return x



In [None]:
def weighted_frequency_by(df, group, feature, print_ = False):
    series = df.groupby([group, feature])['weight_edu'].sum()

    # convert pd.series to pd.dataframe
    x = series.to_frame()

    # rownames to new columns
    x.index.name = feature
    x.reset_index(inplace=True)

    # rename column
    x.rename(columns={'weight_edu': 'count'}, inplace = True)

    # remove decimals
    x = x.round(0)

    # Calculate group count and percent per group
    x['group_count'] = x.groupby([group]).transform('sum', numeric_only = True)
    x['pct'] = x['count'] / x['group_count']

    if(print_):
        print(x)

    return x


### Analysis

In the following, the data visualized on www.trygghetsundersokelsen.no is reproduces.

In [None]:

# ------------------------------------------------------------------------------------------------ #
# urtrygg1: Percentage that feels unsafe if they go out alone at night in the area where they live #
# ------------------------------------------------------------------------------------------------ #

weighted_frequency(df, 'urtrygg1')

weighted_frequency_by(df, 'utdanning_kort', 'urtrygg1')

weighted_frequency_by(df, 'aldersgruppe_vuttrekk_dsf', 'urtrygg1')

weighted_frequency_by(df, 'fylkenr_dsf', 'urtrygg1')


In [None]:
# ------------------------------------------------------------------- #
# urtrygg2: Percentage that always plan ahead to avoid crime exposure #
# ------------------------------------------------------------------- #
weighted_frequency(df, 'urtrygg2')

weighted_frequency_by(df, 'barn', 'urtrygg2')

weighted_frequency_by(df, 'kjonn_dsf', 'urtrygg2')

weighted_frequency_by(df, 'fylkenr_dsf', 'urtrygg2')

In [None]:
# ----------------------------------------------------------------- #
# urtrygg3: Percentage that is worried about being exposed to crime #
# ----------------------------------------------------------------- #

weighted_frequency(df, 'urtrygg3')

weighted_frequency_by(df, 'innvandret', 'urtrygg3')

weighted_frequency_by(df, 'gift', 'urtrygg3')


In [None]:
# ------------------------------------------------------------ #
# urkrim_1: Percentage that worries about exposure to burglary #
# ------------------------------------------------------------ #

weighted_frequency(df, 'urkrim_1')

weighted_frequency_by(df, 'barn', 'urkrim_1')

weighted_frequency_by(df, 'barn', 'urkrim_2')

In [None]:

# ------- #
# Worries #
# ------- #

for feature in worry:
    weighted_frequency(df, feature)


In [None]:

# -------------------------------------- #
# Worries by demographic characteristics #
# -------------------------------------- #

for feature in worry:
    for group in groups:
        weighted_frequency_by(df, group, feature, print_ = True)

In [None]:
# -------- #
# Exposure #
# -------- #

for feature in exposure:
    weighted_frequency(df, feature, print_ = True)


In [None]:

# -------------------------------------- #
# Exposure by demographic characteristics #
# -------------------------------------- #

for feature in exposure:
    for group in groups:
        weighted_frequency_by(df, group, feature, print_ = True)