In [227]:
import os
import datetime
import numpy as np
import pandas as pd
import plotly as py
import cufflinks as cf

In [228]:
# py.tools.set_credentials_file(username='chronist', api_key='ChgUEoth9jxWqnwCDVGa')
py.tools.set_credentials_file(username='chronist2', api_key='zNXCas8b5vEIvuY4j2Me')

In [229]:
data_directory = '../data'
participant = 'a'
rolling_mean_window = 30
timeframe = ('2016-04-01', '2017-02-01')

In [230]:
def remove_outliers(series):
    iqr = (series.quantile(0.25) * 1.5, series.quantile(0.75) * 1.5)
    outliers = (series < iqr[0]) | (series > iqr[1])
    return series[~outliers]

In [231]:
def normalize(series):
    min = series.min()
    max = series.max()
    return ((series - min) / (max - min) - 0.5) * 2

In [232]:
data = pd.DataFrame()

lifeslice = pd.read_csv(data_directory + '/' + participant + '/lifeslice.csv', parse_dates=[['date', 'time']], index_col=['date_time']).dropna()
lifeslice_emotions_valence = remove_outliers(lifeslice['emotions.valence'])
lifeslice_emotions_valence = normalize(lifeslice_emotions_valence)
data = data.merge(lifeslice_emotions_valence.to_frame('lifeslice'), how='outer', left_index=True, right_index=True)

for dataset in ['imessage', 'facebook', 'dayone', '750words']:
    csv = data_directory + '/' + participant + '/' + dataset + '.csv'
    if (not os.path.exists(csv)):
        continue
    df = pd.read_csv(csv, parse_dates=[['date', 'time']], index_col=['date_time']).dropna()
    series = remove_outliers(df['sentiment.comparative'])
    series = normalize(series)
    series = series[series != -1]
    data = data.merge(series.to_frame(dataset), how='outer', left_index=True, right_index=True)

In [233]:
start, end = (data.index.searchsorted(datetime.datetime.strptime(i, '%Y-%m-%d')) for i in timeframe)
data = data[start:end]

In [234]:
data.iplot(kind='histogram', filename='chronist-histogram', subplots=True, shape=(5, 1))

In [235]:
resampled = data.resample('1d').mean().fillna(data.mean()).rolling(rolling_mean_window, center=True).mean()

In [236]:
colors = [
    '#50514F',
    '#F25F5C',
    '#FFE066',
    '#247BA0',
    '#70C1B3',
]

scatters = [py.graph_objs.Scattergl(
    name = column,
    x = data.index,
    y = data[column],
    mode = 'markers',
    marker = {
        'size': 2,
        'color': colors[index]
    }
) for index, column in enumerate(data.columns)]

moving_averages = [py.graph_objs.Scattergl(
    name = column + ' ma',
    x = resampled.index,
    y = resampled[column],
    mode = 'lines',
    line = {
        'color': colors[index]
    }
) for index, column in enumerate(resampled.columns)]

py.plotly.iplot([*scatters, *moving_averages], filename='chronist-time-series')

In [239]:
fig = py.figure_factory.create_scatterplotmatrix(resampled, diag='histogram', height=800, width=800)
py.plotly.iplot(fig, filename='chronist-scatterplot-matrix')