In [None]:
import argparse
from collections import defaultdict
import json
import os
from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.dates as mdates
import numpy as np
import pandas as pd

from filter_optimization.filter_optimization_task import NoDataException, SamplingFilter, generate_filter_mask, \
     extract_biomass_data
from population_metrics.population_metrics_base import generate_pm_base, PopulationMetricsBase
from population_metrics.growth_rate import compute_local_growth_rate
from population_metrics.raw_metrics import get_raw_kf_values, generate_raw_average_weight, get_raw_sample_size
from population_metrics.smart_metrics import generate_smart_avg_weight, generate_smart_individual_values, \
     generate_smart_distribution, generate_smart_avg_kf, get_smart_sample_size, get_smart_growth_rate, \
     generate_smart_standard_deviation
from population_metrics.confidence_metrics import generate_trend_stability, generate_distribution_consistency, \
     compute_biomass_kpi, get_raw_and_historical_weights
from research.utils.datetime_utils import get_dates_in_range
from research.utils.data_access_utils import RDSAccessUtils

In [None]:
rds = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))


def gen_pm_base(df: pd.DataFrame, sampling_filter: SamplingFilter) -> PopulationMetricsBase:
    """
    Returns PopulationMetricsBase instance given input biomass computations
    data-frame (see README for more details) and SamplingFilter instance.
    """

    mask = generate_filter_mask(df, sampling_filter)

    # get filtered set of biomass computations
    biomass_computations = list(zip(df[mask].date.values,
                                    df.loc[mask, 'estimated_weight_g'].values,
                                    df[mask].estimated_k_factor.values))

    # generate population metrics estimator
    if not biomass_computations:
        raise NoDataException('No data found for given filter!')
    return generate_pm_base(biomass_computations)


def generate_ts_data(df: pd.DataFrame, sampling_filter: SamplingFilter) -> defaultdict:
    """
    Given input data-frame of biomass computations and SamplingFilter instance,
    generates time-series data for different raw metrics, smart metrics, growth rate metrics,
    and confidence metrics.
    """

    pm_base = gen_pm_base(df, sampling_filter)
    start_date, end_date = pm_base.unique_dates[0], pm_base.unique_dates[-1]
    dates = get_dates_in_range(start_date, end_date)
    ts_data = defaultdict(list)
    ts_data['date'].extend(dates)
    for date in dates:

        # raw metrics
        raw_average_weight = generate_raw_average_weight(pm_base, date)
        raw_sample_size = get_raw_sample_size(pm_base, date)

        # growth rate metrics
        growth_rate = compute_local_growth_rate(pm_base, date)

        # confidence metrics
        distribution_consistency = generate_distribution_consistency(pm_base, date)
        kpi = compute_biomass_kpi(pm_base, date)

        # smart metrics
        smart_average_weight = generate_smart_avg_weight(pm_base, date)
        smart_average_kf = generate_smart_avg_kf(pm_base, date)
        smart_sample_size = get_smart_sample_size(pm_base, date)
        smart_growth_rate = get_smart_growth_rate(pm_base, date)

        ts_data['raw_average_weight'].append(raw_average_weight)
        ts_data['raw_sample_size'].append(raw_sample_size)
        ts_data['growth_rate'].append(growth_rate)
        ts_data['distribution_consistency'].append(distribution_consistency)
        ts_data['kpi'].append(kpi)
        ts_data['smart_average_weight'].append(smart_average_weight)
        ts_data['smart_average_kf'].append(smart_average_kf)
        ts_data['smart_sample_size'].append(smart_sample_size)
        ts_data['smart_growth_rate'].append(smart_growth_rate)

    return ts_data




In [None]:
pen_id, start_date, end_date = 100, '2020-08-05', '2020-08-29'
sampling_filter = SamplingFilter(start_hour=0, end_hour=24, kf_cutoff=1.24, akpd_score_cutoff=0.99)
df = extract_biomass_data(pen_id, start_date, end_date, sampling_filter.akpd_score_cutoff)

In [None]:
pm_base = gen_pm_base(df, sampling_filter)

In [None]:
generate_smart_avg_weight(pm_base, '2020-08-27')

In [None]:
date = '2020-08-27'

def get_distribution(weights, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        bucket_count = weights[(weights >= low) & (weights < high)].shape[0]
        dist[bucket] = bucket_count
        count += bucket_count
    
    dist = {k: round(100 * v / count, 1) for k, v in dist.items()}
    return dist


def get_kf_breakdown(weights, kfs, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        mean_kf = kfs[(weights >= low) & (weights < high)].mean()
        dist[bucket] = round(mean_kf, 2)
    
    return dist
        
def pretty(d, indent=0):
    for key, value in d.items():
        print('\t' * indent + str(key))
        if isinstance(value, dict):
            pretty(value, indent+1)
        else:
            print('\t' * (indent+1) + str(value))
    

def generate_info(pm_base, date, loss_factor):
    weights, kfs = generate_smart_individual_values(pm_base, date, 3, True, True, 0.9)
    vals = weights * 0.9975 * (1.0 - loss_factor)
    smart_avg = np.mean(vals)
    smart_kf = np.mean(kfs)
    smart_sample_size = get_smart_sample_size(pm_base, date)
    smart_std = np.std(vals)
    cov = smart_std / smart_avg
    weight_dist = get_distribution(vals, np.arange(0, 10000, 500))
    kf_breakdown = get_kf_breakdown(vals, kfs, np.arange(0, 10000, 500))
    
    print('Loss Factor: {}%'.format(round(100 * loss_factor)))
    print('-----------')
    print('Smart Avg Weight: {}g'.format(round(smart_avg)))
    print('Smart K Factor: {}'.format(round(smart_kf, 2)))
    print('Smart Sample Size: {}'.format(smart_sample_size))
    print('Smart Standard Deviation: {}g'.format(round(smart_std)))
    print('Coefficient of Variation: {}%'.format(round(100 * cov, 1)))
    print('Weight Distribution:')
    print(json.dumps(weight_dist, indent=4))
    print('KF Breakdown:')
    print(json.dumps(kf_breakdown, indent=4))

In [None]:
for loss_factor in [0] + list(np.arange(0.13, 0.19, 0.01)):
    generate_info(pm_base, date, loss_factor)
    print(' ')

In [None]:
smart_std = generate_smart_standard_deviation(pm_base, date)
print(smart_std)

In [None]:
cov = smart_std / smart_avg
print(cov)

In [None]:
weights, kfs = generate_smart_individual_values(pm_base, date, 3, True, True, 0.9)
# weights = weights * 0.9985

In [None]:
generate_smart_avg_kf(pm_base, date)

In [None]:
get_distribution(weights, np.arange(0, 10000, 1000))

In [None]:
def get_kf_breakdown(weights, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        mean_kf = kfs[(weights >= low) & (weights < high)].mean()
        dist[bucket] = round(mean_kf, 2)
    
    return dist
        
    
        
    

In [None]:
get_kf_breakdown(weights, np.arange(0, 10000, 1000))

In [None]:
def get_adj_distribution(weights, loss_factor, bucket_cutoffs):
    adj_weights = weights * (1.0 - loss_factor)
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{low}-{high}'
        bucket_count = adj_weights[(adj_weights >= low) & (adj_weights < high)].shape[0]
        dist[bucket] = bucket_count
        count += bucket_count
    
    dist = {k: 100 * v / count for k, v in dist.items()}
    return dist
        
    
        
    

In [None]:
bucket_cutoffs = np.arange(0, 10000, 1000)
loss_factor = 0.16
dist_16 = get_adj_distribution(weights, loss_factor, bucket_cutoffs)

In [None]:
dist_16

In [None]:
get_adj_distribution(weights, 0.17, bucket_cutoffs)

In [None]:
get_adj_distribution(weights, 0.1752, bucket_cutoffs)

In [None]:
X = [6, 7, 8, 11, 7, 6, 11]
Y = [0.0526624699, -0.009913795167, 0.01558849764, -0.02291304971, -0.01581060603, -0.001067805761, -0.01236907407]
W = [6440.00, 6589.00, 20874.00, 5178.00, 39081, 39081, 39081]

In [None]:
from sklearn.linear_model import LinearRegression

X =  np.array(X).reshape(-1, 1)
Y = np.array(Y)
W = np.array(W)

lr = LinearRegression(fit_intercept=False).fit(X, Y, W)

In [None]:
lr.coef_

In [None]:
import numpy as np
1 - np.exp(lr.coef_[0])

In [None]:
np.exp(2*lr.coef_[0])

In [None]:
np.exp(lr.intercept_ + 7*lr.coef_)

In [None]:
lr.intercept_

In [None]:
coef = -0.00277068

In [None]:
np.exp(7*coef)