In [None]:
# name of preprocessed csv file
file_processed = 'interview-mock/mock_processed_en.csv'

In [None]:
# --- run once to download, filter, translate & save interview data ---
# (deepl limits number of chars you can translate per month)
# (skip if using mock data without setting up .env)

# options
sheet_name = 'mock'
file_download = 'interview-mocks/mock_raw.csv'

from dotenv import load_dotenv; load_dotenv(); import os
from data_import import columns_from, fetch_google_sheet
fetch_google_sheet(os.getenv('GOOGLE_SHEET_ID'), sheet_name, file_download)
_ = columns_from(file_download, should_translate=True, file_out=file_processed)

In [None]:
import re
# --- run to setup natural language processor ---
from analysis import NLProcessor
NLProcessor.ready() # this may take a minute or two when notebook is restarted
# compute similarity with       .
NLProcessor.set_similarity_data('medicine')

In [None]:
from data_import import columns_from
use_columns = lambda col_name: 'Assoziationen' in col_name
cols = {
    header: [
        (lambda content: {
            'content': content,
            'sentiment': NLProcessor.sentiment(content)['compound'],
            'similarity': NLProcessor.similarity(content)
        })(re.sub('^[^a-zA-Z"\']*', '', cell, flags=re.MULTILINE).replace('\n', '; '))
    for cell in column]
for header, column in columns_from(file_processed, use_col=use_columns).items() }

for header, column in cols.items():
    print(header)
    for cell in column:
        print(cell, end=' ')
        print()
    print()

In [None]:
import pandas as pd
import pandas_bokeh
plot_vals = ['content', 'sentiment', 'similarity']
df1 = pd.DataFrame([[point[pv] for pv in plot_vals] for point in cols['1.d) Assoziationen Namen']], columns=plot_vals)
df2 = pd.DataFrame([[point[pv] for pv in plot_vals] for point in cols['1.e) Assoziationen Logo']], columns=plot_vals)
df3 = pd.DataFrame([[point[pv] for pv in plot_vals] for point in cols['3.a) Assoziationen Gentest']], columns=plot_vals)

In [None]:
ax1 = df1.plot_bokeh.scatter(
    title='name associations',
    x='sentiment', y='similarity', ylabel='similarity w/ "medicine"',
    hovertool_string='@{content}', show_figure=False)
ax2 = df2.plot_bokeh.scatter(
    title='logo associations',
    x='sentiment', y='similarity', ylabel='similarity w/ "medicine"',
    hovertool_string='@{content}', show_figure=False)
ax3 = df3.plot_bokeh.scatter(
    title='genetic testing associations',
    x='sentiment', y='similarity', ylabel='similarity w/ "medicine"',
    hovertool_string='@{content}', show_figure=False)
pandas_bokeh.plot_grid([[ax1, ax2], [ax3]])