In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
from os.path import basename
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
from matplotlib.pylab import rcParams
import matplotlib.colors as mcolors
import glob
from sklearn.cluster import KMeans

from lib import generate_histogram, parallel_coordinates_for_multinomial_distribution

ModuleNotFoundError: No module named 'pandas'

In [None]:
plt.style.use('ggplot')
rcParams['figure.figsize'] = 11, 4

# Data exploration for behaviour recognition 
Data: http://extrasensory.ucsd.edu/
Using strategy like: https://doi.org/10.48550/arXiv.2207.08816

In [None]:
def plot_labels(d, f, ax):
    d_ = d.melt(id_vars=['timestamp'], ignore_index=False)
    d_ = d_[d_.value > 0]
    d_ = d_.sort_index()
    ax.plot(d_.index, d_.variable, marker='|', lw=0, ms=10)
    ax.set_title(basename(f))
    return

In [None]:
files = glob.glob("ExtraSensory.per_uuid_mood_labels/*.csv.gz")
ds = []
for f in files:
    d = pd.read_csv(f)
    d = d.dropna(how='any')
    if d.shape[0] > 0:
        ds.append((f,d))
fig, axs = plt.subplots(nrows=len(ds)//2+(len(ds)%2>0), ncols=2,
                        figsize=(15,70))
axs = np.concatenate(axs)
for i, (f,d) in enumerate(ds):
    ax = axs[i]
    plot_labels(d, f, ax)
plt.show()

In [None]:
d = pd.read_csv('ExtraSensory.per_uuid_mood_labels/2C32C23E-E30C-498A-8DD2-0EFB9150A02E.moods.csv.gz')
d = d.dropna(how='any')
d = d.melt(id_vars=['timestamp'], ignore_index=False)
d = d[d.value > 0]
d = d.sort_index()
d.head()

In [None]:
d['datetime'] = [datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') for ts in d.timestamp]
d.head()

In [None]:
fig, ax = plt.subplots(figsize=(15,5))
ax.plot(pd.to_datetime(d.datetime), d.variable, marker='|', lw=0, ms=10)
myFmt = mdates.DateFormatter('%d %H:%M')
ax.xaxis.set_major_formatter(myFmt)
ax.xaxis.set_minor_locator(mdates.HourLocator(byhour=None, interval=3, tz=None))
ax.xaxis.set_major_locator(mdates.HourLocator(byhour=None, interval=6, tz=None))
plt.xticks(rotation=70)
plt.show()

In [None]:
histograms = generate_histogram(d, 30)
histograms.head()

In [None]:
fig, axs = plt.subplots(nrows=6, figsize=(15,50))
classes = list(histograms.columns)
classes.remove('time')

colors = np.random.choice(list(mcolors.CSS4_COLORS.keys()), len(classes), replace=False)
colormap = dict(zip(classes, colors))

def plot_violins(hist, ax):
    parallel_coordinates_for_multinomial_distribution(
    hist, 
    group_column_name='time', 
    class_names=list(colormap.keys()),
    #sub_group_column_name='day',
    #row2linestyle = r2ls,
    kw_lines = {
            'color': 'gray', 
            'alpha': 0.2, 
            'lw': 1, 
            'zorder': 0},
    class_colors=list(colormap.values()),
    ax = ax)

histograms_ = histograms[(histograms['time'] >= '00:00:00') & (histograms['time'] < '04:00:00')]
plot_violins(histograms_, axs[0])
histograms_ = histograms[(histograms['time'] >= '04:00:00') & (histograms['time'] < '08:00:00')]
plot_violins(histograms_, axs[1])
histograms_ = histograms[(histograms['time'] >= '08:00:00') & (histograms['time'] < '12:00:00')]
plot_violins(histograms_, axs[2])
histograms_ = histograms[(histograms['time'] >= '12:00:00') & (histograms['time'] < '16:00:00')]
plot_violins(histograms_, axs[3])
histograms_ = histograms[(histograms['time'] >= '16:00:00') & (histograms['time'] < '20:00:00')]
plot_violins(histograms_, axs[4])
histograms_ = histograms[(histograms['time'] >= '20:00:00') & (histograms['time'] < '24:00:00')]
plot_violins(histograms_, axs[5])




In [None]:
hist_ = histograms.drop(columns=['time'])
kmeans = KMeans(n_clusters=6, random_state=0).fit(hist_)
histograms['cluster'] = kmeans.labels_

In [None]:
fig, ax = plt.subplots(figsize=(15,5))
parallel_coordinates_for_multinomial_distribution(
histograms, 
group_column_name='cluster', 
class_names=list(colormap.keys()),
#sub_group_column_name='day',
#row2linestyle = r2ls,
kw_lines = {
        'color': 'gray', 
        'alpha': 0.2, 
        'lw': 1, 
        'zorder': 0},
class_colors=list(colormap.values()),
ax = ax)

In [None]:
pd.value_counts(histograms['cluster'])

In [None]:
sensors = pd.read_csv('ExtraSensory.per_uuid_features_labels/2C32C23E-E30C-498A-8DD2-0EFB9150A02E.features_labels.csv.gz')
sensors

In [None]:
merged = d.merge(sensors, how='left', on='timestamp')

In [None]:
merged

In [None]:
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(7,6), sharex=True)
axs = np.concatenate(axs)
for i in range(len(axs)):
    axs[i].plot(merged[merged.columns[7+i*4]][10:40])
    print(merged.columns[7+i*4])
    #axs[i].set_xticklabels([])
plt.savefig('plots/neidi_data_plot.pdf')