In [None]:
# Import necessary packages
from main_util import *
from skimage import filters

## Event Detection


In [None]:
def analyze_period(data_dir, fm=None):
    """Performs eoos analysis based on the period
            when the data was collected.
        Args:
            data_dir: Path to the data.
            fm: if include flowmeter data for analysis
    """
    
    # Dataframe used to log the analysis
    cols = ['date', 'plot_num', 'start index', 'end index', 
            'start time', 'end time', 'feces volume', 'urine volume']
    df_stat = pd.DataFrame(columns=cols)
    writer = pd.ExcelWriter(data_dir+'.xlsx', engine='xlsxwriter')
    
    # analyze date by date
    for date in os.listdir(data_dir):
        if date.startswith('.'): continue # skip sys hidden files
        if data_dir == 'period_2' and fm:
            # for period 2, there are some dates using fm w sr of 2
            if int(date) > 20210319:
                fm = 2
            else:
                fm = 5
          
        # detect events for one date
        df, s_idxs, e_idxs = analyze_one_day(data_dir, date, 
                                             flowmeter=fm, door=False)
        
        # create figures and stats for each event
        plot_num = 0
        for s, e in zip(s_idxs, e_idxs):
            
            fig = plot_event(df, s, e, flowmeter = fm, derivative=None)
            fig.savefig(os.path.join(data_dir, date, 'event_' + str(plot_num+1) + '.png'))
            plt.close()

            row = {'date': date,  'plot_num': plot_num, 
                   'start index': s, 'end index': e,
                   'start time': df.loc[s, 'date_time'] , 
                   'end time': df.loc[e, 'date_time'],
                   'feces volume': df.loc[e, 'feces'] - df.loc[s, 'feces'],
                   'urine volume': df.loc[e, 'urine'] - df.loc[s, 'urine']}
            df_stat = df_stat.append(row, ignore_index=True, sort=False)
            plot_num += 1
    
    # Save the stats
    df_stat.to_excel(writer, sheet_name=date)
    writer.save()

In [None]:
duration_1 = analyze_period('period_1', None)

In [None]:
duration_2 = analyze_period('period_2', 2)

In [None]:
duration_3 = analyze_period('data_3', 2)

## Event Classification
Now, stat analysis for each detected events are stored as local csv files. We can read them for further modeling.

In [None]:
def read_stat(data_dir):
    """Read stats for detected events
        Args:
            data_dir: Path to the data.
    """
    df = pd.ExcelFile(data_dir+'.xlsx')
    df_dates_list = []
    # read stats date by date
    for sheet_name in df.sheet_names:
        df_data = df.parse(sheet_name, skiprows=0) 
        df_dates_list.append(df_data)
    df_dates = pd.concat(df_dates_list, axis=0)
    df_dates['volume'] = df_dates.loc[:, 'feces volume'] + df_dates.loc[:, 'urine volume']
    df_dates['duration'] = df_dates.loc[:, 'end index'] - df_dates.loc[:, 'start index']
    
    # remove vols < 0.2
    df_dates = df_dates.loc[df_dates.loc[:, 'volume'] >= 0.2, :]
    # remove duration < 15
    df_dates = df_dates.loc[df_dates.loc[:, 'duration'] >= 15, :]
    # remove duration > 450
    df_dates = df_dates.loc[df_dates.loc[:, 'duration'] <= 450, :]
    return df_dates

In [None]:
df_1 = read_stat('period_1')
duration_1 = df_1.loc[:, 'duration'].values

In [None]:
df_2 = read_stat('period_2')
duration_2 = df_2.loc[:, 'duration'].values

In [None]:
df_3 = read_stat('data_3')
duration_3 = df_3.loc[:, 'duration'].values

In [None]:
duration_short_123 = list(duration_1)+ list(duration_2) + list(duration_3)

### GMM Modeling

In [None]:
from sklearn import mixture
duration_short_123 = np.array(duration_short_123).reshape(-1, 1)
gmm = mixture.GaussianMixture(n_components=2, covariance_type='full').fit(duration_short_123)

In [None]:
# Parameters of fitted GMM Modeling
means = gmm.means_.reshape(2, )
var = gmm.covariances_.reshape(2, )
stds = np.sqrt(var)
weights = gmm.weights_
print('means', means)
print('var', var)
print('stds', stds)
print('weights', weights)

In [None]:
# GMM prediction
y = gmm.predict(duration_short_123)
y_prob = gmm.predict_proba(duration_short_123)

In [None]:
from scipy.stats import norm
def gauss(x, mu, sigma, a):
    y = norm.pdf(x, mu, sigma)*a 
    return y

def bimodal(x, mu1, sigma1, mu2, sigma2):
    return gauss(x,mu1,sigma1)+gauss(x,mu2,sigma2)

In [None]:
# Plotting GMM
gmm_x = np.linspace(0,450)
gmm_y = np.exp(gmm.score_samples(gmm_x.reshape(-1,1)))

# Plot histograms and gaussian curves
fig, ax = plt.subplots()
ax.hist(duration_short_123, bins=range(15, 450 + 5, 5), density=True)
ax.plot(gmm_x, gmm_y, color="crimson", lw=1, label="GMM")
label_2 = 'x~p(x|{:.0f}, {:.0f}\u00b2)*{:.1f}'.format(means[1], stds[1], weights[1])
label_1 = 'x~p(x|{:.0f}, {:.0f}\u00b2)*{:.1f}'.format(means[0], stds[0], weights[0])
plt.plot(gmm_x, gauss(gmm_x, means[0], stds[0], weights[0]), color='red', lw=1, ls="--", label=label_1)
plt.plot(gmm_x, gauss(gmm_x, means[1], stds[1], weights[1]), color='red', lw=1, ls=":", label=label_2)
plt.axvline(x=124, ls='-', linewidth=1, color = 'red', label = 't = %d'%124)
ax.set_xlabel("Duration (s)", fontsize=14)
ax.set_ylabel("Density", fontsize=14)
plt.legend(fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()