In [1]:
library(dplyr)
library(ggplot2)
library(tidyr)
filepath <- '/Users/cyz/survey/active_survey/'


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
# Load 2016 responses
load(paste0(filepath, 'data/cces/CCES16_Common_OUTPUT_Feb2018_VV.RData'))

In [3]:
# Questions Valid responses
# 301a-o 1-5  # exclude these as they're asked of a non-random sample
# 302-304 1-5
# 307 1-4
# 312_1-7 1-2
# 320a-c 1-4
# 330a-e 1-2
# 331_1-9 1-2
# 332a-f 1-2
# 333a-d 1-2
# 334a-d 1-2
# 335 1-2
# 351A-K 1-2

# Selection criteria:
# Subset of pre-election questions
# Include questions that seek an opinion
# Exclude respondent covariates, actions, knowledge about politics, party recall and name recognition, vote intention
# Exclude questions on approval of respondent's local representatives
# Exclude rating of respondent or political entities on liberal-conservative scale

In [4]:
# Get column names for desired questions
questions <- c(302, 303, 304, 307, 312, '320a', '320b', '320c', 330, 331, 332, 333, 334, 335, 351)
question_prefixes <- lapply(questions, function(q) { paste0('CC16_', q) })
all_qnames <- colnames(x)
colname_mask <- sapply(all_qnames, function(qname) {
    any(sapply(question_prefixes, function(prefix) { startsWith(qname, prefix) }))
})
selected_qnames <- all_qnames[colname_mask]


In [5]:
# get responses in numeric form (they are factors)
# x %>% select(selected_qnames)  # can't apply as.numeric to this
responses <- cbind(sapply(selected_qnames, function(qname) { as.numeric(x[,qname]) }))

In [6]:
rownames(responses) <- x$V101

In [7]:
write.csv(responses, paste0(filepath, 'data/cces/cces16.csv'), na="")

In [8]:
selected_qnames

In [9]:
responses

Unnamed: 0,CC16_302,CC16_303,CC16_304,CC16_307,CC16_312_1,CC16_312_2,CC16_312_3,CC16_312_4,CC16_312_5,CC16_312_6,⋯,CC16_351A,CC16_351B,CC16_351C,CC16_351D,CC16_351E,CC16_351F,CC16_351G,CC16_351H,CC16_351I,CC16_351K
222168628,3,2,6,2,,,,,,,⋯,,1,,,1,2,1,2,1,1
273691199,4,3,5,3,,,,,,,⋯,,2,,,1,1,1,2,2,1
284214415,5,2,4,2,,,,,,,⋯,,1,,,1,1,1,1,1,1
287557695,4,4,5,2,,,,,,,⋯,,2,,,1,1,2,2,1,1
290387662,2,2,6,2,,,,,,,⋯,,2,,,1,1,1,1,2,1
290932100,4,4,4,3,,,,,,,⋯,,1,,,1,1,1,2,1,1
292860642,3,3,3,2,,,,,,,⋯,,2,,,1,2,1,1,1,1
295367942,5,5,6,2,2,2,2,2,1,2,⋯,2,2,1,1,1,1,1,1,2,1
295717127,4,4,6,2,,,,,,,⋯,,2,,,2,1,1,1,1,2
295859014,5,3,5,1,,,,,,,⋯,,2,,,1,1,1,1,1,2


In [10]:
# capture additional questions
# ideo5 1-5
# pid7 1-7
# pew_religimp 1-4
# newsint 1-4
numeric_questions <- c('ideo5', 'pid7', 'pew_religimp', 'newsint')
cat_questions <- c('pid3', 'gender', 'educ', 'race', 'child18', 'ownhome')
more_responses <- cbind(
    responses,
    sapply(numeric_questions, function(qname) { as.numeric(x[,qname]) }),
    sapply(cat_questions, function(qname) { x[,qname] })
)

In [11]:
write.csv(more_responses, paste0(filepath, 'data/cces/cces16_full.csv'), na="")