In [None]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from research_lib.utils.data_access_utils import RDSAccessUtils

In [None]:
rds = RDSAccessUtils()
query = """
    select * from prod.biomass_computations
    where pen_id=159
    and captured_at between '2021-04-14' and '2021-04-22'
    and akpd_score > 0.9;
"""

df = rds.extract_from_database(query)
df.index = pd.to_datetime(df.captured_at)
df['date'] = df.index.date.astype(str)
df['hour'] = df.index.hour


In [None]:
def generate_distribution(weights, min_weight=0, max_weight=12000, bin_width=1000):
    bucket_edges = np.arange(min_weight, max_weight, bin_width)
    buckets, bin_pcts = [], []
    for low_weight, high_weight in zip(bucket_edges, bucket_edges[1:]):
        bucket = '{}-{}'.format(low_weight, high_weight)
        buckets.append(bucket)
        
        bin_sample_size = weights[(weights > low_weight) & (weights < high_weight)].shape[0]
        bin_pct = round(100 * bin_sample_size / len(weights), 2)
        bin_pcts.append(bin_pct)
        
    return buckets, bin_pcts
        
        

In [None]:
plt.figure(figsize=(15, 8))
hour_mask = (df.hour > 7) & (df.hour < 15)
date_mask_1 = (df.date >= '2021-04-14') & (df.date <= '2021-04-16')
buckets, bin_pcts = generate_distribution(df[date_mask_1 & hour_mask].estimated_weight_g.values)
plt.bar(buckets, bin_pcts, color='blue', label='Before partial harvest')
tdf1 = pd.DataFrame({'bucket': buckets, 'percenatge': bin_pcts})

date_mask_2 = (df.date >= '2021-04-19') & (df.date <= '2021-04-21')
buckets, bin_pcts = generate_distribution(df[date_mask_2 & hour_mask].estimated_weight_g.values)
plt.bar(buckets, bin_pcts, color='red', alpha=0.5, label='After partial harvest')
tdf2 = pd.DataFrame({'bucket': buckets, 'percenatge': bin_pcts})

plt.xlabel('Weight bucket (g)')
plt.ylabel('Percentage (%)')

plt.title('Storelva pen 3: Partial Harvest Weight Distribution Comparison')
plt.legend()
plt.grid()
plt.show()



In [None]:
tdf2