In [1]:
import pandas as pd

# NumPy is a library for advanced mathematical computation
import numpy as np

# MatPlotLib is a library for basic data visualization
import matplotlib.pyplot as plt

# SeaBorn is a library for advanced data visualization
import seaborn as sb

import glob

## General NPS Cleaning Process:
* Step 1: Import, Clean, and Aggregate Weeks 1-7 for 2016 Data
* Step 2: Import and Clean Week 8 for 2016 Data
* Step 2.5: Aggregate Weeks 1-7 with Week 8 to produce Full 2016 Dataset
* Step 3: Import and Clean 2017 Data
* Step 3.5: Aggregate Full 2016 Data with 2017 Data to produce Complete Dataset

In [2]:
sb.set(style="white", context="notebook", palette="deep")

COLOR_COLUMNS = ["#66C2FF", "#5CD6D6", "#00CC99", "#85E085", "#FFD966", "#FFB366", "#FFB3B3", "#DAB3FF", "#C2C2D6"]

sb.set_palette(palette=COLOR_COLUMNS, n_colors=4)

In [3]:
PATH_DIRECTORY = "./SA_Feedback_Surveys_FINAL/2016/"
WEEK1_7_PATH = "Anon*.csv"

WEEK8_PATH = "Week 8 Feedback (2016, incomplete) - results.csv"

### Weeks 1 - 7 (2016)

In [4]:
WEEK1_7_FILES = glob.glob(PATH_DIRECTORY + WEEK1_7_PATH)

In [5]:
WEEK1_7_FILES

['./SA_Feedback_Surveys_FINAL/2016/Anon Week 7 Feedback - Taipei.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 6 Feedback - Tokyo.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 1 Feedback - Singapore.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 7 Feedback - LA.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 4 Feedback - SF.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 5 Feedback - SV.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 4 Feedback - SG.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 6 Feedback - NY.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 5 Feedback - HK.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 1 Feedback - SF.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 2 Feedback - LA.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 6 Feedback - Taipei.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 3 Feedback - NY.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week 6 Feedback - LA.csv',
 './SA_Feedback_Surveys_FINAL/2016/Anon Week

In [6]:
dfs = list()
for dataframe in WEEK1_7_FILES:
    dfs.append(pd.read_csv(dataframe))

In [7]:
dfs[0]

Unnamed: 0,Timestamp,How would you rate your overall satisfaction with the Summer Academy this week?,How well is the schedule paced?
0,8/5/2016 1:39:41,3,3
1,8/5/2016 1:40:47,4,3
2,8/5/2016 1:40:50,4,3
3,8/5/2016 1:42:44,4,4
4,8/5/2016 1:45:13,5,4
5,8/5/2016 1:45:39,4,3
6,8/5/2016 1:49:21,4,3
7,8/8/2016 1:30:34,5,3
8,8/8/2016 1:33:45,5,3
9,8/8/2016 1:49:29,5,3


In [8]:
def get_loc_week():
    dfs = list()
    for files in WEEK1_7_FILES:
        file_split = files.split(" ")
        week = int(file_split[2])
        location = file_split[5].split(".")[0]
        df = pd.read_csv(files)
        df['week'] = week
        df['location'] = location
        dfs.append(df)
        
    return dfs

array_of_dfs = get_loc_week()

In [11]:
array_of_dfs[0]

Unnamed: 0,Timestamp,How would you rate your overall satisfaction with the Summer Academy this week?,How well is the schedule paced?,week,location
0,8/5/2016 1:39:41,3,3,7,Taipei
1,8/5/2016 1:40:47,4,3,7,Taipei
2,8/5/2016 1:40:50,4,3,7,Taipei
3,8/5/2016 1:42:44,4,4,7,Taipei
4,8/5/2016 1:45:13,5,4,7,Taipei
5,8/5/2016 1:45:39,4,3,7,Taipei
6,8/5/2016 1:49:21,4,3,7,Taipei
7,8/8/2016 1:30:34,5,3,7,Taipei
8,8/8/2016 1:33:45,5,3,7,Taipei
9,8/8/2016 1:49:29,5,3,7,Taipei


### Anon Week 1 Feedback - SV.csv has  Unnamed: 0 which is actually just timestamp data
- Just need to rename to Timestamp

### Anon Week 5 Feedback - SF.csv is missing a timestamp column
- I need to add a df["Timestamp"] column and fill with np.NaN


### We want to put both 'How well are the tutorials paced?' and 'How well are the tutorials paced?' in the same dataframe and merged. Then add that column to in our main dataframe we want to create a 'pacing'column

#### Merging those two pacing columns is an example of Merging Mutually Exclusive Columns¶

### In Week 8 we need to rename our 'location' column to 'Location'

In [10]:
len(array_of_dfs)

39