# Import Raw Data

In [33]:
import unicodecsv

def read_csv(filename):
    with open(filename, 'rb') as file:
        reader = unicodecsv.DictReader(file)
        return list(reader)

raw_data = read_csv('survey_results_raw_column_updated.csv')

In [34]:
raw_data[0]

{u'Timestamp': u'2017/07/04 4:04:48 AM GMT+1',
 u'age': u'28',
 u'branch_assoc': u'Yes',
 u'campus_job': u'Yes',
 u'cgpa': u'7 to 8',
 u'council_or_fest': u'Yes',
 u'course': u'Electrical and Electronic Engineering',
 u'gender': u'Male',
 u'graduation': u'2013',
 u'job_sat': u'7',
 u'risk_appetite': u'Would have been more than what I had',
 u'second_degree': u'No',
 u'second_degree_field': u'',
 u'student_club': u'Yes',
 u'student_club_post': u'Yes',
 u'time_acad': u'I would have spent more time than I did',
 u'time_eca': u'I would have spent the same amount of time that I did',
 u'time_experiment': u'Would have spent more time applying/experimenting',
 u'time_social': u'I would have spent the same amount of time that I did',
 u'work_field': u'Data analytics',
 u'workex': u'4'}

In [35]:
len(raw_data)

118

# Input value alteration functions

In [36]:
# A function to convert Yes to 1 and No to 0
def convert_yes_no(my_list, value):
    if my_list[value] == "Yes":
        my_list[value] = 1
    elif my_list[value] == "No":
        my_list[value] = 0

# A function to convert text based answer to short forms
def convert_text(my_list, value, short_text):
    my_list[value] = short_text

# Alter input values
Altering some input values for ease of analysis

In [38]:
for entry in raw_data:
    convert_yes_no(entry, "branch_assoc")
    convert_yes_no(entry, "campus_job")
    convert_yes_no(entry, "council_or_fest")
    convert_yes_no(entry, "student_club")
    convert_yes_no(entry, "student_club_post")
    convert_yes_no(entry, "second_degree")
    
    # Changing course to short form
    if entry["course"] == "Mechanical Engineering":
        convert_text(entry, "course", "mech")
    elif entry["course"] == "Chemical Engineering":
        convert_text(entry, "course", "chem")
    elif entry["course"] == "Electrical and Electronic Engineering":
        convert_text(entry, "course", "eee")
    elif entry["course"] == "Metallurgy and Materials Engineering":
        convert_text(entry, "course", "meta")
    elif entry["course"] == "Computer Science and Engineering":
        convert_text(entry, "course", "cse")
    elif entry["course"] == "Civil Engineering":
        convert_text(entry, "course", "civil")
    elif entry["course"] == "Electronics and Communication Engineering":
        convert_text(entry, "course", "ece")
    elif entry["course"] == "Bio Technology":
        convert_text(entry, "course", "biot")
        
    # converting time spent on various activities to numerical values for regression
    # less = 1
    # same = 2
    # more = 3
    
    if entry["time_acad"] == "I would have spent less time than I did":
        convert_text(entry, "time_acad", 1)
    elif entry["time_acad"] == "I would have spent the same amount of time that I did":
        convert_text(entry, "time_acad", 2)
    elif entry["time_acad"] == "I would have spent more time than I did":
        convert_text(entry, "time_acad", 3)
        
    if entry["time_eca"] == "I would have spent less time than I did":
        convert_text(entry, "time_eca", 1)
    elif entry["time_eca"] == "I would have spent the same amount of time that I did":
        convert_text(entry, "time_eca", 2)
    elif entry["time_eca"] == "I would have spent more time than I did":
        convert_text(entry, "time_eca", 3)
    
    if entry["time_social"] == "I would have spent less time than I did":
        convert_text(entry, "time_social", 1)
    elif entry["time_social"] == "I would have spent the same amount of time that I did":
        convert_text(entry, "time_social", 2)
    elif entry["time_social"] == "I would have spent more time than I did":
        convert_text(entry, "time_social", 3)
    
    if entry["risk_appetite"] == "Would have been lesser than what I had":
        convert_text(entry, "risk_appetite", 1)
    elif entry["risk_appetite"] == "Would have been the same that I had":
        convert_text(entry, "risk_appetite", 2)
    elif entry["risk_appetite"] == "Would have been more than what I had":
        convert_text(entry, "risk_appetite", 3)
    
    if entry["time_experiment"] == "Would have spent less time applying/experimenting":
        convert_text(entry, "time_experiment", 1)
    elif entry["time_experiment"] == "Would have spent the same amount of time as academics":
        convert_text(entry, "time_experiment", 2)
    elif entry["time_experiment"] == "Would have spent more time applying/experimenting":
        convert_text(entry, "time_experiment", 3)
    
raw_data[15] #verification

{u'Timestamp': u'2017/07/08 4:52:52 PM GMT+1',
 u'age': u'25',
 u'branch_assoc': 0,
 u'campus_job': 1,
 u'cgpa': u'<6',
 u'council_or_fest': 0,
 u'course': 'civil',
 u'gender': u'Male',
 u'graduation': u'2013',
 u'job_sat': u'8',
 u'risk_appetite': 2,
 u'second_degree': 1,
 u'second_degree_field': u'Construction Management Nicmar ',
 u'student_club': 1,
 u'student_club_post': 0,
 u'time_acad': 3,
 u'time_eca': 3,
 u'time_experiment': 3,
 u'time_social': 1,
 u'work_field': u'Sales and marketing of steel structures',
 u'workex': u'3'}

# Cleanup functions
Below cell has functions to alter data types

In [39]:
def parse_int(num):
    if num == '':
        return None
    else:
        return int(num)
    
def parse_float(num):
    if num == '':
        return None
    else:
        return float(num)

def parse_empty_string(string):
    if string == '':
        return None
    else:
        return string
    
# More to be added    

# Data type conversions

In [41]:
for entry in raw_data:
    entry["age"] = parse_int(entry["age"])
    entry["job_sat"] = parse_int(entry["job_sat"])
    entry["workex"] = parse_float(entry["workex"])
    entry["second_degree_field"] = parse_empty_string(entry["second_degree_field"])

raw_data[14] # verification

{u'Timestamp': u'2017/07/08 4:51:37 PM GMT+1',
 u'age': 26,
 u'branch_assoc': 0,
 u'campus_job': 1,
 u'cgpa': u'7 to 8',
 u'council_or_fest': 0,
 u'course': 'eee',
 u'gender': u'Male',
 u'graduation': u'2013',
 u'job_sat': 8,
 u'risk_appetite': 2,
 u'second_degree': 0,
 u'second_degree_field': None,
 u'student_club': 0,
 u'student_club_post': 1,
 u'time_acad': 2,
 u'time_eca': 3,
 u'time_experiment': 2,
 u'time_social': 2,
 u'work_field': u'Data Analytics / Software Design',
 u'workex': 4.0}