In [None]:
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'svg')

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 15,8

In [None]:
import scipy.stats
import analysis
from matplotlib import pyplot
from matplotlib import colors
from collections import Counter
import itertools
import string
import random

In [None]:
data = analysis.get_full_dict()

In [None]:
correlations = {}
edu_dict = {'Sophomore': 1, 'Junior': 2, 'Senior': 3, 'Graduate': 4}

any = next(iter(data.keys()))
questions = list(data[any].keys())
top_pvals = []

for i in range(len(questions)):
    row = questions[i]
    x = analysis.pull_data(data, data.keys(), row)
    x = [edu_dict.get(k, k) for k in x]

    if not isinstance(x[0], (int, float)):
        print(row)
        continue

    correlations[row] = {}

    for j in range(len(questions)):
        col = questions[j]
        y = analysis.pull_data(data, data.keys(), col)
        y = [edu_dict.get(k, k) for k in y]

        if not isinstance(y[0], (int, float)):
            continue

        correlations[row][col] = scipy.stats.pearsonr(x, y)

        if j < i:
            top_pvals.append((row, col) + correlations[row][col])

In [None]:
sorted(top_pvals, key=lambda x: x[3])[:5]

In [None]:
matrix = []
#keys = ['Education']
keys = []

for k in ('usability', 'efficiency', 'knowledge'):
    keys += analysis.qs_entry[k]
    
for k in ('usability', 'education', 'intention'):
    keys += analysis.qs_exit[k]

for i, row in enumerate(keys):
    matrix.append([])
    for j, col in enumerate(keys):
        matrix[i].append(correlations[row][col])

In [None]:
my_cmap = colors.LinearSegmentedColormap(
    'Custom',
    {
        'red':   ((0.0, 1.0, 1.0),
                  (0.4, 1.0, 1.0),
                  (0.8, 0.0, 0.0),
                  (1.0, 1.0, 1.0)),

        'green': ((0.0, 0.0, 0.0),
                  (0.4, 1.0, 1.0),
                  (0.8, 0.0, 0.0),
                  (1.0, 1.0, 1.0)),

        'blue':  ((0.0, 0.0, 0.0),
                  (0.4, 1.0, 1.0),
                  (0.8, 1.0, 1.0),
                  (1.0, 1.0, 1.0))
    }
)

fig, ax = pyplot.subplots(figsize=[12,12])
image = [[(1 + x[0]) / 2.5 if x[1] < 0.05 else 1.0 for x in row] for row in matrix]
ax.imshow(image, cmap=my_cmap, interpolation='nearest')
pyplot.xlabel("Signifigant Correlations", fontsize=16)
ax.xaxis.tick_top()
pyplot.xticks(range(len(keys)), keys, rotation='vertical')
pyplot.yticks(range(len(keys)), keys)
#pyplot.setp(pyplot.xticks()[1], rotation=270)

tick_colors = {
    'Usability Rating (Entry)': 'k',
    'Efficiency Rating (Entry)': 'saddlebrown',
    'Knowledge Rating (Entry)': 'k',
    'Usability Rating (Exit)': 'saddlebrown',
    'Education Rating (Exit)': 'k',
    'Intention Rating (Exit)': 'saddlebrown'
}

for t in ax.xaxis.get_ticklabels():
    for c in tick_colors:
        if (t.get_text() in analysis.categories[c]):
            t.set_color(tick_colors[c])
            
for t in ax.yaxis.get_ticklabels():
    for c in tick_colors:
        if (t.get_text() in analysis.categories[c]):
            t.set_color(tick_colors[c])
            
ax.grid(True)
pyplot.tight_layout()

pyplot.savefig("out/sig.png", transparent=True)

fig, ax = pyplot.subplots()
image = [[1 - 20 * x[1] if x[1] < 0.05 else 0 for x in row] for row in matrix]
ax.imshow(image, cmap=pyplot.cm.Blues, interpolation='nearest')
pyplot.title("Signifigant P-Values", fontsize=16)

### analysis.main()

In [None]:
keys = analysis.qs_entry['qualitative'] + analysis.qs_exit['qualitative']
entry_list, exit_list = analysis.read_data()
qual_dict = analysis.organize_data(entry_list, exit_list)[4]
users = [k for k in qual_dict]
data = {key: analysis.pull_data(qual_dict, users, key) for key in keys}

In [None]:
data['What do you think needs the most improvements?']

In [None]:
punctuation = set(string.punctuation)

any = next(iter(qual_dict.keys()))
counter = Counter()
words = []

for k in qual_dict[any].keys():
    for s in data[k]:
        sanitized = ''.join(ch.lower() for ch in s if ch not in punctuation)
        words += sanitized.split()

counter.update(words)    
list(itertools.takewhile(lambda x: x[1] > 3, counter.most_common()))

In [None]:
markov = {}

for i in range(1, len(words)):
    if words[i-1] in markov:
        markov[words[i-1]].append(words[i])
    else:
        markov[words[i-1]] = [words[i]]
        
s = [random.choice(list(markov.keys()))]

while len(s) < 100 and s[-1] in markov:
    s.append(random.choice(markov[s[-1]]))

print(s)