# Part 1: Extract the variable descriptions

In [1]:
import pandas as pd
import re

In [2]:
ols_data = pd.read_csv("ols_animal_research_survey_data_2016.csv")

In [3]:
with open("ols_animal_research_survey_data_2016_ukda_data_dictionary.rtf") as file:  
    data = file.read()

In [4]:
# data

In [5]:
question_tuples = re.findall('(Variable\slabel\s=\s\}\{\\\\cf4\s)([\d\w\s\?\.\:\,]*)', data)
question_tuples[0:5]

[('Variable label = }{\\cf4 ', 'Respondent serial'),
 ('Variable label = }{\\cf4 ',
  'How well informed do you feel about the use of animals in scientific research in the UK?'),
 ('Variable label = }{\\cf4 ',
  'Interested in finding out more about the ongoing work to find alternatives to using animals in research?'),
 ('Variable label = }{\\cf4 ',
  'Interested in finding out more about the ongoing work to improve the welfare of animals in scientific research?'),
 ('Variable label = }{\\cf4 ',
  'I can accept the use if animals in research as long as it is for medical research purposes and there is no alternative')]

In [6]:
var_name_tuples = re.findall('(Variable\s=\s\}\{\\\\f2\\\\fs20\\\\cf4\s)([\d\w]*)', data)

In [7]:
cleaned_data = pd.DataFrame()

In [8]:
for tuple in enumerate(question_tuples):
    survey_num = 1
    var_name = var_name_tuples[tuple[0]][1]
    question = tuple[1][1]
    cleaned_data = cleaned_data.append([[survey_num, var_name, question]], ignore_index=True)

In [9]:
cleaned_data.columns = ["Survey_Number", "Var_Name", "Var_Text"]
cleaned_data.head()

Unnamed: 0,Survey_Number,Var_Name,Var_Text
0,1,ID,Respondent serial
1,1,Q1,How well informed do you feel about the use of...
2,1,Q2a,Interested in finding out more about the ongoi...
3,1,Q2b,Interested in finding out more about the ongoi...
4,1,Q3a,I can accept the use if animals in research as...


In [10]:
len(cleaned_data)

248

In [11]:
num = 248/3
num

82.66666666666667

In [12]:
cleaned_data[82:165]["Survey_Number"] += 1
cleaned_data[165:248]["Survey_Number"] += 2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [13]:
cleaned_data

Unnamed: 0,Survey_Number,Var_Name,Var_Text
0,1,ID,Respondent serial
1,1,Q1,How well informed do you feel about the use of...
2,1,Q2a,Interested in finding out more about the ongoi...
3,1,Q2b,Interested in finding out more about the ongoi...
4,1,Q3a,I can accept the use if animals in research as...
5,1,Q3b,There needs to be more work done into alternat...
6,1,Q3c,I can accept the use of animals in scientific ...
7,1,Q3d,Animals should not be used in any scientific r...
8,1,Q3e,It does not bother me if animals are used in s...
9,1,Q3f,The use of animals for medical research should...


In [21]:
def make_csv(survey_num):
    df = cleaned_data[cleaned_data["Survey_Number"] == survey_num]
    df = df.iloc[:, 1:3]
    name = "OLSAnimal_" + str(survey_num) + ".csv"
    print(len(df))
    df.to_csv(path_or_buf=name, index=False)

In [22]:
make_csv(1)
make_csv(2)
make_csv(3)

82
83
83


In [23]:
cleaned_data.to_csv(path_or_buf="OLSAnimal_cleaned_joined.csv", index=False)

## Part 2: Create the survey CSV

In [24]:
d = {"Survey_Number": [1, 2, 3], 
    "Survey_Name": ["Public Attitudes towards Animal Research 2016-1", "Public Attitudes towards Animal \
    Research 2016-2", "Public Attitudes towards Animal Research 2016-3"], "Num_Participants": [987, 987, 987], \
    "Org_Conduct": ["IPSOS", "IPSOS", "IPSOS"], "Num_Questions": [82, 83, 83], "Data_Link": \
    ["http://bit.ly/2F7fNuv", "http://bit.ly/2F7fNuv", "http://bit.ly/2F7fNuv"], \
    "Documentation_Link": ["http://bit.ly/2CUZWsE", "http://bit.ly/2CUZWsE", "http://bit.ly/2CUZWsE"],\
     "Source_Link": ["http://bit.ly/2CsBSlr", "http://bit.ly/2CsBSlr", "http://bit.ly/2CsBSlr"],\
     "Summary": ["Part 1 of findings of a \
        2016 survey on current public awareness and attitudes towards the use of animals in research. The \
        survey also examines attitudes towards, and trust in, the regulatory system and the people who work \
        with animals in research.",
                 "Part 2 of findings of a \
        2016 survey on current public awareness and attitudes towards the use of animals in research. The \
        survey also examines attitudes towards, and trust in, the regulatory system and the people who work \
        with animals in research.", 
                 "Part 3 of findings of a \
        2016 survey on current public awareness and attitudes towards the use of animals in research. The \
        survey also examines attitudes towards, and trust in, the regulatory system and the people who work \
        with animals in research."]}

In [25]:
detail = pd.DataFrame(data=d, columns = ["Survey_Number", "Survey_Name", "Num_Participants", \
                      "Org_Conduct", "Num_Questions", "Data_Link", "Documentation_Link", "Source_Link", \
                      "Summary"])

In [26]:
detail

Unnamed: 0,Survey_Number,Survey_Name,Num_Participants,Org_Conduct,Num_Questions,Data_Link,Documentation_Link,Source_Link,Summary
0,1,Public Attitudes towards Animal Research 2016-1,987,IPSOS,82,http://bit.ly/2F7fNuv,http://bit.ly/2CUZWsE,http://bit.ly/2CsBSlr,Part 1 of findings of a 2016 survey on...
1,2,Public Attitudes towards Animal Research 2...,987,IPSOS,83,http://bit.ly/2F7fNuv,http://bit.ly/2CUZWsE,http://bit.ly/2CsBSlr,Part 2 of findings of a 2016 survey on...
2,3,Public Attitudes towards Animal Research 2016-3,987,IPSOS,83,http://bit.ly/2F7fNuv,http://bit.ly/2CUZWsE,http://bit.ly/2CsBSlr,Part 3 of findings of a 2016 survey on...


In [27]:
detail.to_csv(path_or_buf="OLS_Survey_Detail.csv", index=False)