In [None]:
import json, os
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA

from aquabyte.data_access_utils import RDSAccessUtils

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

In [None]:
query = """
    select p.id as id, s.name as site_name, p.name as pen_name from pens p
    left join sites s
    on p.site_id = s.id
    order by p.id;
"""

df_pens = rds_access_utils.extract_from_database(query)

pen_ids = [ 56, 60, 85, 86 ] #56 # 37 # 56, 60,
#pen_ids = [ 56, 57, 58, 59, 60 ] #56 # 37 # 56, 60,
# pen_ids = [ 56, 57, 58, 59, 60, 66, 37, 38, 95 ]
# pen_ids = [ 56, 57, 58, 59, 60, 66 ]
# pen_ids = [ 56, 57, 58, 59, 60 ]
# pen_ids = [ 56, 57, 58, 59, 60, 85, 86 ]
date = '2020-01-01'

pen_infos = []

#print(df_pens)

for index, pen in df_pens.iterrows():
    #if pen.id in pen_ids:
    pen_infos.append((pen.id, pen.site_name, pen.pen_name))
        
#pen_id, site_name, pen_name = pen_infos[0]

new_df = pd.DataFrame()

for pen_id in pen_ids:
    query = """
        select date, female_avg, female_moving_avg, moving_avg, moving_moving_avg, num_lati_fish, num_moving_avg_lati_fish
        from day_summaries a
        where a.pen_id = %i
        and a.date >= '%s';
    """ % (pen_id, date)

    day_summaries = rds_access_utils.extract_from_database(query)

    day_summaries.index = pd.to_datetime(day_summaries['date'])
    day_summaries = day_summaries.sort_index()
    new_df[pen_id] = day_summaries['num_lati_fish']
    
for pen_info in pen_infos:
    print(pen_info)

In [None]:
avg_df = new_df.rolling(window=20,center=True).median().dropna()
pca = PCA(n_components=2)
pca.fit(avg_df.values)
print(pca.explained_variance_ratio_)
print(pca.components_)

components = np.matmul(avg_df.values, pca.components_.T)

fig, ax = plt.subplots(figsize=(7,3))

fig.set_size_inches(15, 10)

ax.plot(components[:,0], label = '1')
ax.plot(components[:,1], label = '2')
ax.legend()

In [None]:
avg_df = new_df.rolling(window=20,center=True).mean().dropna()
pca = PCA(n_components=2)
pca.fit(avg_df.values)
print(pca.explained_variance_ratio_)
print(pca.components_)

components = np.matmul(avg_df.values, pca.components_.T)

fig, ax = plt.subplots(figsize=(7,3))

fig.set_size_inches(15, 10)

ax.plot(components[:,0], label = '1')
ax.plot(components[:,1], label = '2')
ax.legend()

In [None]:
fig, ax = plt.subplots(figsize=(7,3))

fig.set_size_inches(15, 10)

new_df.rolling(window=20,center=True).median().plot(ax=ax)

In [None]:
fig, ax = plt.subplots(figsize=(7,3))

fig.set_size_inches(15, 10)

new_df.rolling(window=20,center=True).median().plot(ax=ax)

In [None]:
# fig, ax = plt.subplots(figsize=(7,3))

# fig.set_size_inches(15, 10)

# new_df[[66, 95]].rolling(window=20,center=True).median().plot(ax=ax)

In [None]:
fig, ax = plt.subplots(figsize=(7,3))

fig.set_size_inches(15, 10)

new_df.rolling(window=20,center=True).mean().plot(ax=ax)

In [None]:
fig, ax = plt.subplots(figsize=(7,3))

fig.set_size_inches(15, 10)

new_df.rolling(window=20,center=True).median().plot(ax=ax)

In [None]:
avgCorr = np.concatenate([[0], np.mean(avgCorrs, axis=1)])
indices = [0]

for i in range(int(bucket / 2), int(bucket / 2) + step * (len(avgCorr) - 1), step):
    indices.append(i)

fig, ax = plt.subplots(figsize=(7,3))

fig.set_size_inches(15, 10)

ax.plot(new_df.index[indices], avgCorr)

In [None]:
total = len(new_df)
bucket = 30
step = 3

corrSeries = []

for start in range(0, total - bucket + step, step):
    end = start + bucket
    
    #print(start, end)
    
    corr = new_df.ix[start:end].corr()
    
    #print(corr)
    
    corrSeries.append(corr.values)
    
labels = new_df.columns
numPens = len(labels)

corrSeries = np.array(corrSeries)

print(corrSeries.shape)

fig, ax = plt.subplots(numPens + 1)

fig.set_size_inches(15, (numPens + 1) * 10)

avgCorrs = []
for period in range(corrSeries.shape[0]):
    periodCorrs = []

    for index, pen in enumerate(labels):
        penPeriodAvg = (np.sum(corrSeries[period,index,:]) - 1) / (numPens - 1)
        
        periodCorrs.append(penPeriodAvg)
    avgCorrs.append(periodCorrs)

avgCorrs = np.array(avgCorrs)

for index, pen in enumerate(labels):
    ax[0].plot(avgCorrs[:,index], label = pen)

ax[0].set_title('Avg pen correlation')
ax[0].legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

for o_index, o_pen in enumerate(labels):
    for index, pen in enumerate(labels):
        if o_pen == pen:
            continue

        ax[o_index + 1].plot(corrSeries[:, o_index, index], label = pen)

    ax[o_index + 1].set_title('%i pen correlation' % (o_pen, ))
    ax[o_index + 1].legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

In [None]:
labels

In [None]:
avgCorrs