In [None]:
import argparse
from collections import defaultdict
import json
import os
from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.dates as mdates
import numpy as np
import pandas as pd

from filter_optimization.filter_optimization_task import NoDataException, SamplingFilter, generate_filter_mask, \
     extract_biomass_data
from population_metrics.population_metrics_base import generate_pm_base, PopulationMetricsBase
from population_metrics.growth_rate import compute_local_growth_rate
from population_metrics.raw_metrics import get_raw_kf_values, generate_raw_average_weight, get_raw_sample_size
from population_metrics.smart_metrics import generate_smart_avg_weight, generate_smart_individual_values, \
     generate_smart_distribution, generate_smart_avg_kf, get_smart_sample_size, get_smart_growth_rate, \
     generate_smart_standard_deviation
from population_metrics.confidence_metrics import generate_trend_stability, generate_distribution_consistency, \
     compute_biomass_kpi, get_raw_and_historical_weights
from research.utils.datetime_utils import get_dates_in_range
from research.utils.data_access_utils import RDSAccessUtils

In [None]:
help(extract_biomass_data)

In [None]:
rds = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))


def gen_pm_base(df: pd.DataFrame, sampling_filter: SamplingFilter) -> PopulationMetricsBase:
    """
    Returns PopulationMetricsBase instance given input biomass computations
    data-frame (see README for more details) and SamplingFilter instance.
    """
    
    mask = generate_filter_mask(df, sampling_filter)

    # get filtered set of biomass computations
    biomass_computations = list(zip(df[mask].date.values,
                                    df.loc[mask, 'estimated_weight_g'].values,
                                    df[mask].estimated_k_factor.values))

    # generate population metrics estimator
    if not biomass_computations:
        raise NoDataException('No data found for given filter!')
    return generate_pm_base(biomass_computations)


def generate_ts_data(df: pd.DataFrame, sampling_filter: SamplingFilter) -> defaultdict:
    """
    Given input data-frame of biomass computations and SamplingFilter instance,
    generates time-series data for different raw metrics, smart metrics, growth rate metrics,
    and confidence metrics.
    """

    pm_base = gen_pm_base(df, sampling_filter)
    start_date, end_date = pm_base.unique_dates[0], pm_base.unique_dates[-1]
    dates = get_dates_in_range(start_date, end_date)
    ts_data = defaultdict(list)
    ts_data['date'].extend(dates)
    for date in dates:

        # raw metrics
        raw_average_weight = generate_raw_average_weight(pm_base, date)
        raw_sample_size = get_raw_sample_size(pm_base, date)

        # growth rate metrics
        growth_rate = compute_local_growth_rate(pm_base, date)

        # confidence metrics
        distribution_consistency = generate_distribution_consistency(pm_base, date)
        kpi = compute_biomass_kpi(pm_base, date)

        # smart metrics
        smart_average_weight = generate_smart_avg_weight(pm_base, date)
        smart_average_kf = generate_smart_avg_kf(pm_base, date)
        smart_sample_size = get_smart_sample_size(pm_base, date)
        smart_growth_rate = get_smart_growth_rate(pm_base, date)

        ts_data['raw_average_weight'].append(raw_average_weight)
        ts_data['raw_sample_size'].append(raw_sample_size)
        ts_data['growth_rate'].append(growth_rate)
        ts_data['distribution_consistency'].append(distribution_consistency)
        ts_data['kpi'].append(kpi)
        ts_data['smart_average_weight'].append(smart_average_weight)
        ts_data['smart_average_kf'].append(smart_average_kf)
        ts_data['smart_sample_size'].append(smart_sample_size)
        ts_data['smart_growth_rate'].append(smart_growth_rate)

    return ts_data




In [None]:
import population_metrics
print(population_metrics.__path__)

In [None]:
# note: end_date is exclusive. 

pen_id, start_date, end_date = 208, '2020-11-23', '2020-12-03'
sampling_filter = SamplingFilter(start_hour=9, end_hour=13, kf_cutoff=0, akpd_score_cutoff=0.95)
df = extract_biomass_data(pen_id, start_date, end_date, sampling_filter.akpd_score_cutoff)

In [None]:
sampling_filter2 = SamplingFilter(start_hour=0, end_hour=23, kf_cutoff=0, akpd_score_cutoff=0.95)
df2 = extract_biomass_data(pen_id, start_date, end_date, sampling_filter2.akpd_score_cutoff)

In [None]:
df2

In [None]:
import matplotlib.pyplot as plt
df2.plot.hist(y='estimated_weight_g', bins=100, figsize=(20, 5))
plt.axvline(2000)

In [None]:
df2.plot.scatter(x='captured_at', y='estimated_weight_g', figsize=(20, 5))
plt.xlim((df2['captured_at'].min(), df2['captured_at'].max()))
plt.axhline(2000)

In [None]:
df2['captured_at'].min()

In [None]:
c, bins = pd.cut(df2['estimated_weight_g'], 100, retbins=True)
fig, ax = plt.subplots(figsize=(20, 5))
df2['bins'] = c
c.value_counts().sort_index().plot.bar(ax=ax)
df2.groupby('bins')['estimated_k_factor'].mean().plot(ax=ax, secondary_y=True)

In [None]:
pm_base = gen_pm_base(df, sampling_filter)

In [None]:
generate_smart_avg_weight(pm_base, '2020-12-02', 3, True, True, 0.9)

In [None]:
help(generate_smart_avg_weight)

In [None]:
date = '2020-12-02'

def get_distribution(weights, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        bucket_count = weights[(weights >= low) & (weights < high)].shape[0]
        dist[bucket] = bucket_count
        count += bucket_count
    
    dist = {k: round(100 * v / count, 1) for k, v in dist.items()}
    return dist


def get_kf_breakdown(weights, kfs, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        mean_kf = kfs[(weights >= low) & (weights < high)].mean()
        dist[bucket] = round(mean_kf, 2)
    
    return dist
        
def pretty(d, indent=0):
    for key, value in d.items():
        print('\t' * indent + str(key))
        if isinstance(value, dict):
            pretty(value, indent+1)
        else:
            print('\t' * (indent+1) + str(value))
    

def generate_info(pm_base, date, loss_factor):
    weights, kfs = generate_smart_individual_values(pm_base, date, 1, False, False, 0.9)
    vals = weights * 1.0 * (1.0 - loss_factor)
    smart_avg = np.mean(vals)
    smart_kf = np.mean(kfs)
    smart_sample_size = get_smart_sample_size(pm_base, date)
    smart_std = np.std(vals)
    cov = smart_std / smart_avg
    weight_dist = get_distribution(vals, np.arange(0, 14000, 1000))
    kf_breakdown = get_kf_breakdown(vals, kfs, np.arange(0, 14000, 1000))
    
    print('Loss Factor: {}%'.format(round(100 * loss_factor)))
    print('-----------')
    print('Smart Avg Weight: {}g'.format(round(smart_avg)))
    print('Smart K Factor: {}'.format(round(smart_kf, 2)))
    print('Smart Sample Size: {}'.format(smart_sample_size))
    print('Smart Standard Deviation: {}g'.format(round(smart_std)))
    print('Coefficient of Variation: {}%'.format(round(100 * cov, 1)))
    print('Weight Distribution:')
    print(json.dumps(weight_dist, indent=4))
    print('KF Breakdown:')
    print(json.dumps(kf_breakdown, indent=4))
    
    return {
        'loss_factor': round(100 * loss_factor),
        'smart_average_weight': round(smart_avg),
        'smart_k_factor': round(smart_kf, 2),
        'smart_sample_size': smart_sample_size,
        'smart_standard_deviation': smart_std,
        'coefficient_of_variation': round(100 * cov, 1),
        'weight_distribution': weight_dist,
        'kf_breakdown': kf_breakdown
    }

In [None]:
output = []
for loss_factor in [0] + list(np.arange(0.13, 0.19, 0.01)):
    output.append(generate_info(pm_base, date, loss_factor))
    print(' ')

In [None]:
print(json.dumps(output, indent=4).replace('NaN', 'null'))

In [None]:
"""
This module contains constants representing core & auxiliary fish body parts.
"""

UPPER_LIP = 'UPPER_LIP'
EYE = 'EYE'
PECTORAL_FIN = 'PECTORAL_FIN'
DORSAL_FIN = 'DORSAL_FIN'
PELVIC_FIN = 'PELVIC_FIN'
ADIPOSE_FIN = 'ADIPOSE_FIN'
ANAL_FIN = 'ANAL_FIN'
TAIL_NOTCH = 'TAIL_NOTCH'
UPPER_PRECAUDAL_PIT = 'UPPER_PRECAUDAL_PIT'
LOWER_PRECAUDAL_PIT = 'LOWER_PRECAUDAL_PIT'
HYPURAL_PLATE = 'HYPURAL_PLATE'

core_body_parts = sorted([UPPER_LIP,
                          EYE,
                          PECTORAL_FIN,
                          DORSAL_FIN,
                          PELVIC_FIN,
                          ADIPOSE_FIN,
                          ANAL_FIN,
                          TAIL_NOTCH])

auxiliary_body_parts = sorted([UPPER_PRECAUDAL_PIT,
                               LOWER_PRECAUDAL_PIT,
                               HYPURAL_PLATE])

all_body_parts = sorted(core_body_parts + auxiliary_body_parts)

"""This module contains utility helper functions for the WeightEstimator class."""

from collections import namedtuple
from typing import Dict, List, Tuple
import numpy as np
import torch


CameraMetadata = namedtuple('CameraMetadata',
                            ['focal_length', 'focal_length_pixel', 'baseline_m',
                             'pixel_count_width', 'pixel_count_height', 'image_sensor_width',
                             'image_sensor_height'])


def get_left_right_keypoint_arrs(annotation: Dict[str, List[Dict]]) -> Tuple:
    """Gets numpy array of left and right keypoints given input keypoint annotation.
    Args:
        annotation: dict with keys 'leftCrop' and 'rightCrop'. Values are lists where each element
        is a dict with keys 'keypointType', 'xCrop' (num pixels from crop left edge),
        'yCrop' (num pixels from crop top edge), 'xFrame' (num pixels from full frame left edge),
        and 'yFrame' (num pixels from full frame top edge).
    Returns:
        X_left: numpy array containing left crop (xFrame, yFrame) for each key-point ordered
        alphabetically.
        X_right: same as above, but for right crop.
    """

    left_keypoints, right_keypoints = {}, {}
    for item in annotation['leftCrop']:
        body_part = item['keypointType']
        left_keypoints[body_part] = (item['xFrame'], item['yFrame'])

    for item in annotation['rightCrop']:
        body_part = item['keypointType']
        right_keypoints[body_part] = (item['xFrame'], item['yFrame'])

    left_keypoint_arr, right_keypoint_arr = [], []
    for body_part in core_body_parts:
        left_keypoint_arr.append(left_keypoints[body_part])
        right_keypoint_arr.append(right_keypoints[body_part])

    X_left = np.array(left_keypoint_arr)
    X_right = np.array(right_keypoint_arr)
    return X_left, X_right


def normalize_left_right_keypoint_arrs(X_left: np.ndarray, X_right: np.ndarray) -> Tuple:
    """Normalizes input left and right key-point arrays. The normalization involves (1) 2D
    translation of all keypoints such that they are centered, (2) rotation of the 2D coordiantes
    about the center such that the line passing through UPPER_LIP and fish center is horizontal.
    """

    # translate key-points, perform reflection if necessary
    upper_lip_idx = core_body_parts.index(UPPER_LIP)
    tail_notch_idx = core_body_parts.index(TAIL_NOTCH)
    if X_left[upper_lip_idx, 0] > X_left[tail_notch_idx, 0]:
        X_center = 0.5 * (np.max(X_left, axis=0) + np.min(X_left, axis=0))
        X_left_centered = X_left - X_center
        X_right_centered = X_right - X_center
    else:
        X_center = 0.5 * (np.max(X_right, axis=0) + np.min(X_right, axis=0))
        X_left_centered = X_right - X_center
        X_right_centered = X_left - X_center
        X_left_centered[:, 0] = -X_left_centered[:, 0]
        X_right_centered[:, 0] = -X_right_centered[:, 0]

    # rotate key-points
    upper_lip_x, upper_lip_y = tuple(X_left_centered[upper_lip_idx])
    theta = np.arctan(upper_lip_y / upper_lip_x)
    R = np.array([
        [np.cos(theta), -np.sin(theta)],
        [np.sin(theta), np.cos(theta)]
    ])

    D = X_left_centered - X_right_centered
    X_left_rot = np.dot(X_left_centered, R)
    X_right_rot = X_left_rot - D
    return X_left_rot, X_right_rot


def convert_to_world_point_arr(X_left: np.ndarray, X_right: np.ndarray,
                               camera_metadata: CameraMetadata) -> np.ndarray:
    """Converts input left and right normalized keypoint arrays into world coordinate array."""

    y_world = camera_metadata.focal_length_pixel * camera_metadata.baseline_m / \
              (X_left[:, 0] - X_right[:, 0])

    # Note: the lines commented out below are technically the correct formula for conversion
    # x_world = X_left[:, 0] * y_world / camera_metadata.focal_length_pixel
    # z_world = -X_left[:, 1] * y_world / camera_metadata.focal_length_pixel
    x_world = ((X_left[:, 0] * camera_metadata.image_sensor_width / camera_metadata.pixel_count_width) * y_world) / (camera_metadata.focal_length)
    z_world = (-(X_left[:, 1] * camera_metadata.image_sensor_height / camera_metadata.pixel_count_height) * y_world) / (camera_metadata.focal_length)
    X_world = np.vstack([x_world, y_world, z_world]).T
    return X_world


def stabilize_keypoints(X: np.ndarray) -> np.ndarray:
    """Transforms world coordinate array so that neural network inputs are stabilized"""
    X_new = np.zeros(X.shape)
    X_new[:, 0] = 0.5 * X[:, 0] / X[:, 1]
    X_new[:, 1] = 0.5 * X[:, 2] / X[:, 1]
    X_new[:, 2] = 0.05 / X[:, 1]
    return X_new


def convert_to_nn_input(annotation: Dict[str, List[Dict]], camera_metadata: CameraMetadata) \
        -> torch.Tensor:
    """Convrts input keypoint annotation and camera metadata into neural network tensor input."""
    X_left, X_right = get_left_right_keypoint_arrs(annotation)
    X_left_norm, X_right_norm = normalize_left_right_keypoint_arrs(X_left, X_right)
    X_world = convert_to_world_point_arr(X_left_norm, X_right_norm, camera_metadata)
    X = stabilize_keypoints(X_world)
    nn_input = torch.from_numpy(np.array([X])).float()
    return nn_input

"""
This module contains the WeightEstimator class for estimating fish weight (g), length (mm), and
k-factor given input keypoint coordinates and camera metadata.
"""

from typing import Dict, Tuple
import torch
from torch import nn

class Network(nn.Module):
    """Network class defines neural-network architecture for both weight and k-factor estimation
    (currently both neural networks share identical architecture)."""

    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(24, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        """Run inference on input keypoint tensor."""
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.output(x)
        return x


class WeightEstimator:
    """WeightEstimator class is used to predict fish weight, k-factor, and length
    given input keypoint annotations and camera metadata."""

    def __init__(self, weight_model_f: str, kf_model_f: str) -> None:
        """Initializes class with input weight and k-factor neural-networks."""
        self.weight_model = Network()
        self.weight_model.load_state_dict(torch.load(weight_model_f))
        self.weight_model.eval()

        self.kf_model = Network()
        self.kf_model.load_state_dict(torch.load(kf_model_f))
        self.kf_model.eval()

    @staticmethod
    def _get_model_input(annotation: Dict, camera_metadata: CameraMetadata) -> torch.Tensor:
        """Generates neural-network input tensor given annotation and camera_metadata."""
        X = convert_to_nn_input(annotation, camera_metadata)
        return X

    def predict_weight(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates weight prediction given input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        weight = 1e4 * self.weight_model(X).item()
        return weight

    def predict_kf(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates k-factor prediction gievn input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        kf = self.kf_model(X).item()
        return kf

    def predict(self, annotation: Dict, camera_metadata: CameraMetadata) -> Tuple:
        """Generates weight, k-factor, and length predictions given input annotation and camera
        metadata."""
        weight = self.predict_weight(annotation, camera_metadata)
        kf = self.predict_kf(annotation, camera_metadata)
        if weight * kf > 0:
            length = (1e5 * weight / kf) ** (1.0 / 3)
        else:
            length = 0
        return weight, length, kf

In [None]:
fields = [''.join([f.split('_')[0]] + [p[0].upper() + p[1:] for p in f.split('_')[1:]]) for f in CameraMetadata._fields]
fields

In [None]:
row = df.iloc[0]
row_meta = row['camera_metadata']
row_meta

In [None]:
cm = CameraMetadata(row_meta[fields[0]], row_meta[fields[1]], 
               row_meta[fields[2][:-1]], row_meta[fields[3]], 
               row_meta[fields[4]], row_meta[fields[5]], 
               row_meta[fields[6]])

In [None]:
weight_estimator = WeightEstimator('models/nn_epoch_798_v2.pb', 'models/kf_predictor_v2.pb')
row = df.iloc[0]
weight, length, kf = weight_estimator.predict(row['annotation'], cm)
weight

In [None]:
def pred(row):
    row_meta = row['camera_metadata']
    fields = [''.join([f.split('_')[0]] + [p[0].upper() + p[1:] for p in f.split('_')[1:]]) for f in CameraMetadata._fields]
    cm = CameraMetadata(row_meta[fields[0]], row_meta[fields[1]], 
               row_meta[fields[2][:-1]], row_meta[fields[3]], 
               row_meta[fields[4]], row_meta[fields[5]], 
               row_meta[fields[6]])
    weight, length, kf = weight_estimator.predict(row['annotation'], cm)
    return {'estimated_weight_g': weight, 'estimated_length_mm': length, 'estimated_k_factor': kf}
pred(row)

In [None]:
new_preds = df.apply(pred, axis=1).apply(pd.Series)

In [None]:
df = pd.concat([df.drop(new_preds.columns, axis=1), new_preds], axis=1)

In [None]:
pm_base = gen_pm_base(df, sampling_filter)

In [None]:
generate_smart_avg_weight(pm_base, '2020-12-02')

In [None]:
df = pd.concat([df.drop(new_preds.columns, axis=1), new_preds], axis=1)

In [None]:
pm_base = gen_pm_base(df, sampling_filter)

In [None]:
generate_smart_avg_weight(pm_base, '2020-12-02')

In [None]:
df.groupby('date')['estimated_weight_g'].mean().plot()

In [None]:
weights, _ = generate_smart_individual_values(pm_base, '2020-12-02', 3, True, True, 0.9)

In [None]:
bias_mapping = {
    0: -1.5,
    1000: 1.7,
    2000: 1.0,
    3000: 1.7,
    4000: 0.9,
    5000: -0.8,
    6000: -1.0,
    7000: -1.4,
    8000: -1.4,
    9000: -5.8
}

In [None]:
adj_weights = []
for w in weights:
    bucket = int(1000 * int(w / 1000))
    adj_weight = (1.0 - 0.01 * bias_mapping[bucket]) * w
    adj_weights.append(adj_weight)

In [None]:
np.mean(adj_weights)

In [None]:
(np.mean(adj_weights) - 6470) / 6570

In [None]:
6409 * 1.008

In [None]:
date = '2020-12-02'

def get_distribution(weights, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        bucket_count = weights[(weights >= low) & (weights < high)].shape[0]
        dist[bucket] = bucket_count
        count += bucket_count
    
    dist = {k: round(100 * v / count, 1) for k, v in dist.items()}
    return dist


def get_kf_breakdown(weights, kfs, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        mean_kf = kfs[(weights >= low) & (weights < high)].mean()
        dist[bucket] = round(mean_kf, 2)
    
    return dist
        
def pretty(d, indent=0):
    for key, value in d.items():
        print('\t' * indent + str(key))
        if isinstance(value, dict):
            pretty(value, indent+1)
        else:
            print('\t' * (indent+1) + str(value))
    

def generate_info(pm_base, date, loss_factor):
    weights, kfs = generate_smart_individual_values(pm_base, date, 3, True, True, 0.9)
    vals = 1.01 * weights * 1.0 * (1.0 - loss_factor)
    smart_avg = np.mean(vals)
    smart_kf = np.mean(kfs)
    smart_sample_size = get_smart_sample_size(pm_base, date)
    smart_std = np.std(vals)
    cov = smart_std / smart_avg
    weight_dist = get_distribution(vals, np.arange(0, 10000, 1000))
    kf_breakdown = get_kf_breakdown(vals, kfs, np.arange(0, 10000, 1000))
    
    print('Loss Factor: {}%'.format(round(100 * loss_factor)))
    print('-----------')
    print('Smart Avg Weight: {}g'.format(round(smart_avg)))
    print('Smart K Factor: {}'.format(round(smart_kf, 2)))
    print('Smart Sample Size: {}'.format(smart_sample_size))
    print('Smart Standard Deviation: {}g'.format(round(smart_std)))
    print('Coefficient of Variation: {}%'.format(round(100 * cov, 1)))
    print('Weight Distribution:')
    print(json.dumps(weight_dist, indent=4))
    print('KF Breakdown:')
    print(json.dumps(kf_breakdown, indent=4))
    
    return {
        'loss_factor': round(100 * loss_factor),
        'smart_average_weight': round(smart_avg),
        'smart_k_factor': round(smart_kf, 2),
        'smart_sample_size': smart_sample_size,
        'smart_standard_deviation': smart_std,
        'coefficient_of_variation': round(100 * cov, 1),
        'weight_distribution': weight_dist,
        'kf_breakdown': kf_breakdown
    }

In [None]:
import numpy as np

In [None]:
output = []
for loss_factor in [0] + list(np.arange(0.06, 0.09, 0.01)):
    output.append(generate_info(pm_base, date, loss_factor))
    print(' ')

In [None]:
print(json.dumps(output, indent=4).replace('NaN', 'null'))

In [None]:
pen_id, start_date, end_date = 208, '2020-11-23', '2020-12-03'
sampling_filter = SamplingFilter(start_hour=0, end_hour=24, kf_cutoff=1.09, akpd_score_cutoff=0.99)
df = extract_biomass_data(pen_id, start_date, end_date, sampling_filter.akpd_score_cutoff)

In [None]:
new_preds = df.apply(pred, axis=1).apply(pd.Series)

In [None]:
df = pd.concat([df.drop(new_preds.columns, axis=1), new_preds], axis=1)

In [None]:
pm_base = gen_pm_base(df, sampling_filter)

In [None]:
generate_smart_avg_weight(pm_base, '2020-12-02')

In [None]:
generate_smart_avg_weight(pm_base, '2020-12-02')

In [None]:
weights, _ = generate_smart_individual_values(pm_base, '2020-12-02', 3, True, True, 0.9)

In [None]:
bias_mapping = {
    0: -1.5,
    1000: 1.7,
    2000: 1.0,
    3000: 1.7,
    4000: 0.9,
    5000: -0.8,
    6000: -1.0,
    7000: -1.4,
    8000: -1.4,
    9000: -5.8
}

In [None]:
adj_weights = []
for w in weights:
    bucket = int(1000 * int(w / 1000))
    adj_weight = (1.0 - 0.01 * bias_mapping[bucket]) * w
    adj_weights.append(adj_weight)

In [None]:
np.mean(adj_weights)

In [None]:
(np.mean(adj_weights) - 6470) / 6570

In [None]:
6409 * 1.008

In [None]:
date = '2020-12-02'

def get_distribution(weights, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        bucket_count = weights[(weights >= low) & (weights < high)].shape[0]
        dist[bucket] = bucket_count
        count += bucket_count
    
    dist = {k: round(100 * v / count, 1) for k, v in dist.items()}
    return dist


def get_kf_breakdown(weights, kfs, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        mean_kf = kfs[(weights >= low) & (weights < high)].mean()
        dist[bucket] = round(mean_kf, 2)
    
    return dist
        
def pretty(d, indent=0):
    for key, value in d.items():
        print('\t' * indent + str(key))
        if isinstance(value, dict):
            pretty(value, indent+1)
        else:
            print('\t' * (indent+1) + str(value))
    

def generate_info(pm_base, date, loss_factor):
    weights, kfs = generate_smart_individual_values(pm_base, date, 3, True, True, 0.9)
    vals = weights * 1.0 * (1.0 - loss_factor)
    smart_avg = np.mean(vals)
    smart_kf = np.mean(kfs)
    smart_sample_size = get_smart_sample_size(pm_base, date)
    smart_std = np.std(vals)
    cov = smart_std / smart_avg
    weight_dist = get_distribution(vals, np.arange(0, 10000, 1000))
    kf_breakdown = get_kf_breakdown(vals, kfs, np.arange(0, 10000, 1000))
    
    print('Loss Factor: {}%'.format(round(100 * loss_factor)))
    print('-----------')
    print('Smart Avg Weight: {}g'.format(round(smart_avg)))
    print('Smart K Factor: {}'.format(round(smart_kf, 2)))
    print('Smart Sample Size: {}'.format(smart_sample_size))
    print('Smart Standard Deviation: {}g'.format(round(smart_std)))
    print('Coefficient of Variation: {}%'.format(round(100 * cov, 1)))
    print('Weight Distribution:')
    print(json.dumps(weight_dist, indent=4))
    print('KF Breakdown:')
    print(json.dumps(kf_breakdown, indent=4))
    
    return {
        'loss_factor': round(100 * loss_factor),
        'smart_average_weight': round(smart_avg),
        'smart_k_factor': round(smart_kf, 2),
        'smart_sample_size': smart_sample_size,
        'smart_standard_deviation': smart_std,
        'coefficient_of_variation': round(100 * cov, 1),
        'weight_distribution': weight_dist,
        'kf_breakdown': kf_breakdown
    }

In [None]:
import numpy as np

In [None]:
output = []
for loss_factor in [0] + list(np.arange(0.13, 0.19, 0.01)):
    output.append(generate_info(pm_base, date, loss_factor))
    print(' ')

In [None]:
print(json.dumps(output, indent=4).replace('NaN', 'null'))

In [None]:
smart_std = generate_smart_standard_deviation(pm_base, date)
print(smart_std)

In [None]:
cov = smart_std / smart_avg
print(cov)

In [None]:
weights, kfs = generate_smart_individual_values(pm_base, date, 3, True, True, 0.9)
# weights = weights * 0.9985

In [None]:
generate_smart_avg_kf(pm_base, date)

In [None]:
pen_id, start_date, end_date = 208, '2020-11-23', '2020-12-03'
sampling_filter = SamplingFilter(start_hour=9, end_hour=13, kf_cutoff=0, akpd_score_cutoff=0.95)
df = extract_biomass_data(pen_id, start_date, end_date, sampling_filter.akpd_score_cutoff)

In [None]:
get_distribution(weights, np.arange(0, 10000, 1000))

In [None]:
def get_kf_breakdown(weights, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        mean_kf = kfs[(weights >= low) & (weights < high)].mean()
        dist[bucket] = round(mean_kf, 2)
    
    return dist
        
    
        
    

In [None]:
get_kf_breakdown(weights, np.arange(0, 10000, 1000))

In [None]:
def get_adj_distribution(weights, loss_factor, bucket_cutoffs):
    adj_weights = weights * (1.0 - loss_factor)
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{low}-{high}'
        bucket_count = adj_weights[(adj_weights >= low) & (adj_weights < high)].shape[0]
        dist[bucket] = bucket_count
        count += bucket_count
    
    dist = {k: 100 * v / count for k, v in dist.items()}
    return dist
        
    
        
    

In [None]:
bucket_cutoffs = np.arange(0, 10000, 1000)
loss_factor = 0.16
dist_16 = get_adj_distribution(weights, loss_factor, bucket_cutoffs)

In [None]:
dist_16

In [None]:
get_adj_distribution(weights, 0.17, bucket_cutoffs)

In [None]:
get_adj_distribution(weights, 0.1752, bucket_cutoffs)

In [None]:
X = [6, 7, 8, 11, 7, 6, 11]
Y = [0.0526624699, -0.009913795167, 0.01558849764, -0.02291304971, -0.01581060603, -0.001067805761, -0.01236907407]
W = [6440.00, 6589.00, 20874.00, 5178.00, 39081, 39081, 39081]

In [None]:
from sklearn.linear_model import LinearRegression

X =  np.array(X).reshape(-1, 1)
Y = np.array(Y)
W = np.array(W)

lr = LinearRegression(fit_intercept=False).fit(X, Y, W)

In [None]:
lr.coef_

In [None]:
import numpy as np
1 - np.exp(lr.coef_[0])

In [None]:
np.exp(7*lr.coef_[0])

In [None]:
np.exp(lr.intercept_ + 3*lr.coef_)

In [None]:
lr.intercept_

In [None]:
coef = -0.00277068

In [None]:
np.exp(7*coef)