In [10]:
import numpy as np
import pandas as pd
from copy import deepcopy
from scipy.optimize import minimize_scalar
from scipy.integrate import quad

In [2]:
gsd = pd.read_csv("../data/test/gold_standard.csv")

df = gsd.iloc[[0]]


In [3]:
gsd

Unnamed: 0,UID,Cleared,Vertical Diameter,Vertical Hub Diameter,Horizontal Diameter,Horizontal Hub Diameter,Vertical Spiral Turns,Horizontal Spiral Turns,Mesh Width,Capture Area,CTL,Units
0,1,False,1107.645,304.972,893.597,262.968,25,22,15.876,796496,53125.81,Px


In [4]:
def sparseify_gsd(gsd):
    mask_list = [[True,True] + [bool(int(x)) for x in bin(y)[2:].zfill(9)] + [True] for y in range (512)]
    mask_array = np.array(mask_list)
    df = gsd.iloc[[0]]
    df = df.loc[df.index.repeat(512)].reset_index(drop=True)
    df = df.where(mask_array, np.NaN)
    return df
fucked = sparseify_gsd(gsd)
fucked.to_csv("../data/gsdfck.csv")


In [5]:
fucked

Unnamed: 0,UID,Cleared,Vertical Diameter,Vertical Hub Diameter,Horizontal Diameter,Horizontal Hub Diameter,Vertical Spiral Turns,Horizontal Spiral Turns,Mesh Width,Capture Area,CTL,Units
0,1,False,,,,,,,,,,Px
1,1,False,,,,,,,,,53125.81,Px
2,1,False,,,,,,,,796496.0,,Px
3,1,False,,,,,,,,796496.0,53125.81,Px
4,1,False,,,,,,,15.876,,,Px
...,...,...,...,...,...,...,...,...,...,...,...,...
507,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,,796496.0,53125.81,Px
508,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.876,,,Px
509,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.876,,53125.81,Px
510,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.876,796496.0,,Px


In [None]:
gsd.index[gsd["Capture Area"].isna()].tolist()

In [None]:
gsd[gsd["Capture Area"].isna()]["Vertical Diameter"]

In [None]:
gsd.iloc[[8,9]]

In [15]:
label_dict = {
    "dv": "Vertical Diameter",
    "hv": "Vertical Hub Diameter",
    "dh": "Horizontal Diameter",
    "hh": "Horizontal Hub Diameter",
    "nv": "Vertical Spiral Turns",
    "nh": "Horizontal Spiral Turns",
    "mw": "Mesh Width",
    "ca": "Capture Area",
    "ctl": "CTL",
}

$$CA=\left(\frac{d_v}{2}\right)\left(\frac{d_h}{2}\right)\pi - \left(\frac{H_v}{2}\right)\left(\frac{H_h}{2}\right)\pi$$

$$a = b - c$$
$$c = b - a$$

$$CA + \left(\frac{H_v}{2}\right)\left(\frac{H_h}{2}\right)\pi = \left(\frac{d_v}{2}\right)\left(\frac{d_h}{2}\right)\pi$$

$$d_v = 2\left(\frac{CA + \left(\frac{H_v}{2}\right)\left(\frac{H_h}{2}\right)\pi}{\left(\frac{d_h}{2}\right)\pi}\right)$$

---

$$\left(\frac{H_v}{2}\right)\left(\frac{H_h}{2}\right)\pi=\left(\frac{d_v}{2}\right)\left(\frac{d_h}{2}\right)\pi -CA$$
$$H_v=2\left(\frac{\left(\frac{d_v}{2}\right)\left(\frac{d_h}{2}\right)\pi -CA}{\left(\frac{H_h}{2}\right)\pi}\right)$$

maybe instead

$$d_v = \frac{4CA + \pi H_v H_h}{\pi d_h}$$

$$H_v = \frac{\pi d_v d_h - 4CA}{\pi H_h}$$

In [6]:
def CA_from_CA_eq(df, label_dict, verbose=False):
    """Calculate the CA from (Blackledge & Gillespie, 2002)"""
    # Extract Labels
    ca_lab = label_dict["ca"]
    dv_lab = label_dict["dv"]
    dh_lab = label_dict["dh"]
    hv_lab = label_dict["hv"]
    hh_lab = label_dict["hh"]
    
    idxs = df[ca_lab].isna() # Find indices to change
    pre = idxs.sum()
    
    df[ca_lab] = df[ca_lab].mask(pd.isnull, ((df[idxs][dv_lab]/2) * (df[idxs][dh_lab]/2) * np.pi) - ((df[idxs][hv_lab]/2) * (df[idxs][hh_lab]/2) * np.pi))
    df[ca_lab].mask(df[ca_lab] <=0, np.NaN, inplace=True) # Zap any cell with a 0 or negative as these can never be true
    diff = pre - df[ca_lab].isna().sum()
    
    if verbose and diff > 0:
        print(f"    CA from CA EQ: {diff} changes.")
    return df

def dv_from_CA_eq(df, label_dict, verbose=False):
    """Calculate the Vertical Diameter from CA (Blackledge & Gillespie, 2002)"""
    ca_lab = label_dict["ca"]
    dv_lab = label_dict["dv"]
    dh_lab = label_dict["dh"]
    hv_lab = label_dict["hv"]
    hh_lab = label_dict["hh"]
    
    idxs = df[dv_lab].isna()
    pre = idxs.sum()
    
    ca = df[idxs][ca_lab]  # Find all CA vals for any row missing dv values
    dh = df[idxs][dh_lab]
    hv = df[idxs][hv_lab]
    hh = df[idxs][hh_lab]
    
    df[dv_lab] = df[dv_lab].mask(pd.isnull, 2 * ((ca + (hv/2)*(hh/2)*np.pi)/((dh/2)*np.pi)))
    df[dv_lab].mask(df[dv_lab] <=0, np.NaN, inplace=True)
    
    diff = pre - df[dv_lab].isna().sum()
    if verbose and diff > 0:
        print(f"    dv from CA EQ: {diff} changes.")
    return df

def dh_from_CA_eq(df, label_dict, verbose=False):
    """Calculate the Horizontal Diameter from CA (Blackledge & Gillespie, 2002)"""
    # Flip values in label dict as dh and dv are derived in the same way from each other. Yay for the commutative property!
    tmp_ld = deepcopy(label_dict)
    tmp_ld["dv"], tmp_ld["dh"] = tmp_ld["dh"], tmp_ld["dv"]
    
    pre = df[label_dict["dh"]].isna().sum()
    df = dv_from_CA_eq(df, tmp_ld, verbose=False)
    diff = pre - df[label_dict["dh"]].isna().sum()
    
    if verbose and diff > 0:
        print(f"    dh from CA EQ: {diff} changes.")
    
    return df

def hv_from_CA_eq(df, label_dict, verbose=False):
    """Calculate the Vertical Hub Diameter from CA (Blackledge & Gillespie, 2002)"""
#     print("Trying hv from CA")
    
    ca_lab = label_dict["ca"]
    dv_lab = label_dict["dv"]
    dh_lab = label_dict["dh"]
    hv_lab = label_dict["hv"]
    hh_lab = label_dict["hh"]
    
    idxs = df[hv_lab].isna()
    pre = idxs.sum()
    
    ca = df[idxs][ca_lab]  # Find all CA vals for any row missing dv values
    dh = df[idxs][dh_lab]
    dv = df[idxs][dv_lab]
#     hv = df[idxs][hv_lab]
    hh = df[idxs][hh_lab]
    
#     df[hv_lab] = df[hv_lab].mask(pd.isnull, 2 * ((((dv/2)*(dh/2)*np.pi)-ca)/((hh/2)*np.pi)))
    df[hv_lab] = df[hv_lab].mask(pd.isnull, (np.pi * dv * dh - 4 * ca)/(np.pi * hh))
    df[hv_lab].mask(df[hv_lab] <=0, np.NaN, inplace=True)
    
    diff = pre - df[hv_lab].isna().sum()
    if verbose and diff > 0:
        print(f"    hv from CA EQ: {diff} changes.")
        
    return df

def hh_from_CA_eq(df, label_dict, verbose=False):
    """Calculate the Horizontal Hub Diameter from CA (Blackledge & Gillespie, 2002)"""
    # Flip values in label dict as dh and dv are derived in the same way from each other. Yay for the commutative property!
    tmp_ld = deepcopy(label_dict)
    tmp_ld["hv"], tmp_ld["hh"] = tmp_ld["hh"], tmp_ld["hv"]
    
    pre = df[label_dict["dh"]].isna().sum()
    df = hv_from_CA_eq(df, tmp_ld, False)
    diff = pre - df[label_dict["dh"]].isna().sum()
    
    if verbose and diff > 0:
        print(f"    hh from CA EQ: {diff} changes.")
    
    return df

$$H_v = \frac{\pi d_v d_h - 4CA}{\pi H_h}$$

$$MW = \frac{1}{2}\left(\frac{r_u - Hr_u}{S_u-1} + \frac{r_l - Hr_l}{S_l-1}\right)$$
Alternative formulation
$$MW = \frac{1}{2}\left(\frac{d_v - H_v}{N_v-2} + \frac{d_h - H_h}{N_h-2}\right)$$

For d_v
$$2MW = \frac{d_v - H_v}{N_v-2} + \frac{d_h - H_h}{N_h-2}$$
$$d_v = (N_v - 2)\left(2MW -  \frac{d_h - H_h}{N_h-2}\right) + H_v$$

For d_h
$$d_h = (N_h - 2)\left(2MW -  \frac{d_v - H_v}{N_v-2}\right) + H_h$$
*Therefore we can just flip labels*

For H_v
$$ H_v = 0 - \left((N_v - 2)\left(2MW -  \frac{d_h - H_h}{N_h-2}\right) - d_v\right)$$

For N_v
$$2MW = \frac{d_v - H_v}{N_v-2} + \frac{d_h - H_h}{N_h-2}$$
$$N_v = \left(\frac{d_v - H_v}{2MW - \frac{d_h - H_h}{N_h-2}}\right) + 2$$

In [7]:
def MW_from_MW_eq(df, label_dict, verbose=False):
    """Calculate the MW from (citation)"""
    
    dv_lab = label_dict["dv"]
    dh_lab = label_dict["dh"]
    hv_lab = label_dict["hv"]
    hh_lab = label_dict["hh"]
    nv_lab = label_dict["nv"]
    nh_lab = label_dict["nh"]
    mw_lab = label_dict["mw"]
    
    idxs = df[mw_lab].isna()
    pre = idxs.sum()
    
    dh = df[idxs][dh_lab]
    dv = df[idxs][dv_lab]
    hv = df[idxs][hv_lab]
    hh = df[idxs][hh_lab]
    nv = df[idxs][nv_lab]
    nh = df[idxs][nh_lab]
    
    df[mw_lab] = df[mw_lab].mask(pd.isnull, 0.5*(((dv - hv)/(nv*2 - 2)) + ((dh - hh)/(nh*2 - 2))))
    df[mw_lab].mask(df[mw_lab] <=0, np.NaN, inplace=True)

    diff = pre - df[mw_lab].isna().sum()
    
    
    if verbose and diff > 0:
        print(f"    MW from MW EQ: {diff} changes.")
        
    return df


def dv_from_MW_eq(df, label_dict, verbose=False):
    """Calculate the Vertical Diameter from MW (citation)"""
    
    # This function has been marked up as an example of how all derivation funcs are structured
    
    # Find dataframe labels from the supplied label mapping  
    dv_lab = label_dict["dv"]
    dh_lab = label_dict["dh"]
    hv_lab = label_dict["hv"]
    hh_lab = label_dict["hh"]
    nv_lab = label_dict["nv"]
    nh_lab = label_dict["nh"]
    mw_lab = label_dict["mw"]
    
    # Set the label we're trying to derive just once to reduce manual error when writing these funcs
    deriv_lab = dv_lab  # Only need to change this between funcs
    
    # Work out where the desired values are missing
    idxs = df[deriv_lab].isna()
    
    # Calculate how many are missing
    pre = idxs.sum()
    
    # Find the vectors of each element we need for the equation
    # Note that here we CAN use chained indexing as we are not assigning into the locations
    dh = df[idxs][dh_lab]
#     dv = df[idxs][dv_lab]
    hv = df[idxs][hv_lab]
    hh = df[idxs][hh_lab]
    nv = df[idxs][nv_lab]
    nh = df[idxs][nh_lab]
    mw = df[idxs][mw_lab]
    
    # Derive any place where there is a null in the desired column, based upon the formula below
    df[deriv_lab] = df[deriv_lab].mask(pd.isnull, (nv*2 - 2) * (2*mw - ((dh - hh) / (nh*2 - 2))) + hv)
    
    # If any values are 0 or less, these CANNOT be correct, and so should be replaced with NaN to make the operation transparent
    df[deriv_lab].mask(df[deriv_lab] <=0, np.NaN, inplace=True)
    
    # Calc how many have been changed
    diff = pre - df[deriv_lab].isna().sum()
    
    
    if verbose and diff > 0:
        print(f"    dv from MW EQ: {diff} changes.")
        
    return df


def dh_from_MW_eq(df, label_dict, verbose=False):
    """Calculate the Horizontal Diameter from MW (citation)"""
    # Flip appropriate values in label dict.
    tmp_ld = deepcopy(label_dict)
    tmp_ld["dv"], tmp_ld["dh"] = tmp_ld["dh"], tmp_ld["dv"]
    tmp_ld["hv"], tmp_ld["hh"] = tmp_ld["hh"], tmp_ld["hv"]
    tmp_ld["nv"], tmp_ld["nh"] = tmp_ld["nh"], tmp_ld["nv"]
    
    pre = df[label_dict["dh"]].isna().sum()
    df = dv_from_MW_eq(df, tmp_ld, verbose=False)
    diff = pre - df[label_dict["dh"]].isna().sum()
    
    if verbose and diff > 0:
        print(f"    dh from MW EQ: {diff} changes.")
    
    return df


def hv_from_MW_eq(df, label_dict, verbose=False):
    """Calculate the Vertical Hub Diameter from MW (citation)"""
    
#     print("Trying hv from MW")
    
    dv_lab = label_dict["dv"]
    dh_lab = label_dict["dh"]
    hv_lab = label_dict["hv"]
    hh_lab = label_dict["hh"]
    nv_lab = label_dict["nv"]
    nh_lab = label_dict["nh"]
    mw_lab = label_dict["mw"]
    
    deriv_lab = hv_lab  # Only need to change this between funcs
    
    idxs = df[deriv_lab].isna()
    pre = idxs.sum()
    
    dh = df[idxs][dh_lab]
    dv = df[idxs][dv_lab]
#     hv = df[idxs][hv_lab]
    hh = df[idxs][hh_lab]
    nv = df[idxs][nv_lab]
    nh = df[idxs][nh_lab]
    mw = df[idxs][mw_lab]
    
    df[deriv_lab] = df[deriv_lab].mask(pd.isnull, 0 - ((nv*2 - 2)*(2*mw - ((dh-hh)/(nh*2-2))) - dv))
    df[deriv_lab].mask(df[deriv_lab] <=0, np.NaN, inplace=True)

    diff = pre - df[deriv_lab].isna().sum()
    
    
    if verbose and diff > 0:
        print(f"    hv from MW EQ: {diff} changes.")
        
    return df


def hh_from_MW_eq(df, label_dict, verbose=False):
    """Calculate the Horizontal Hub Diameter from MW (citation)"""
    # Flip appropriate values in label dict.
    tmp_ld = deepcopy(label_dict)
    tmp_ld["dv"], tmp_ld["dh"] = tmp_ld["dh"], tmp_ld["dv"]
    tmp_ld["hv"], tmp_ld["hh"] = tmp_ld["hh"], tmp_ld["hv"]
    tmp_ld["nv"], tmp_ld["nh"] = tmp_ld["nh"], tmp_ld["nv"]
    
    pre = df[label_dict["hh"]].isna().sum()
    df = hv_from_MW_eq(df, tmp_ld, verbose=False)
    diff = pre - df[label_dict["hh"]].isna().sum()
    
    if verbose and diff > 0:
        print(f"    hh from MW EQ: {diff} changes.")
    
    return df


def nv_from_MW_eq(df, label_dict, verbose=False):
    """Calculate the Vertical Spiral Count from MW (citation)"""
    
#     print("Trying nv from MW")
    
    dv_lab = label_dict["dv"]
    dh_lab = label_dict["dh"]
    hv_lab = label_dict["hv"]
    hh_lab = label_dict["hh"]
    nv_lab = label_dict["nv"]
    nh_lab = label_dict["nh"]
    mw_lab = label_dict["mw"]
    
    deriv_lab = nv_lab  # Only need to change this between funcs
    
    idxs = df[deriv_lab].isna()
    pre = idxs.sum()
    
    dh = df[idxs][dh_lab]
    dv = df[idxs][dv_lab]
    hv = df[idxs][hv_lab]
    hh = df[idxs][hh_lab]
#     nv = df[idxs][nv_lab]
    nh = df[idxs][nh_lab]
    mw = df[idxs][mw_lab]
    
    df[deriv_lab] = df[deriv_lab].mask(pd.isnull, (((dv - hv)/(2*mw - ((dh-hh)/(nh*2-2))))+2)/2)
    df[deriv_lab].mask(df[deriv_lab] <=0, np.NaN, inplace=True)

    diff = pre - df[deriv_lab].isna().sum()
    
    
    if verbose and diff > 0:
        print(f"    nv from MW EQ: {diff} changes.")
        
    return df


def nh_from_MW_eq(df, label_dict, verbose=False):
    """Calculate the Horizontal Spiral Count from MW (citation)"""
    
    #TODO: SOMETHING IS FUCKY HERE
    # Flip appropriate values in label dict.
    tmp_ld = deepcopy(label_dict)
    tmp_ld["dv"], tmp_ld["dh"] = tmp_ld["dh"], tmp_ld["dv"]
    tmp_ld["hv"], tmp_ld["hh"] = tmp_ld["hh"], tmp_ld["hv"]
    tmp_ld["nv"], tmp_ld["nh"] = tmp_ld["nh"], tmp_ld["nv"]
    
    pre = df[label_dict["nh"]].isna().sum()
    df = nv_from_MW_eq(df, tmp_ld, verbose=False)
    diff = pre - df[label_dict["nh"]].isna().sum()
    
    if verbose and diff > 0:
        print(f"    nh from MW EQ: {diff} changes.")
    
    return df

In [8]:
def CTL_from_CTL_eq(df, label_dict, verbose=False):
    """Calculate the CTL from (Venner et al. 2001)"""
    dv_lab = label_dict["dv"]
    dh_lab = label_dict["dh"]
    hv_lab = label_dict["hv"]
    hh_lab = label_dict["hh"]
    nv_lab = label_dict["nv"]
    nh_lab = label_dict["nh"]
    ctl_lab = label_dict["ctl"]
    
    deriv_lab = ctl_lab
    
    idxs = df[deriv_lab].isna()
    pre = idxs.sum()
    
    dh = df[idxs][dh_lab]
    dv = df[idxs][dv_lab]
    hv = df[idxs][hv_lab]
    hh = df[idxs][hh_lab]
    nv = df[idxs][nv_lab]
    nh = df[idxs][nh_lab]
    
    df[deriv_lab] = df[deriv_lab].mask(pd.isnull, (np.pi/16) * (2*nv + 2*nh)*(dv + hv + dh + hh))
    df[deriv_lab].mask(df[deriv_lab] <=0, np.NaN, inplace=True)

    diff = pre - df[deriv_lab].isna().sum()
    
    
    if verbose and diff > 0:
        print(f"    CTL from CTL EQ: {diff} changes.")
        
    return df



In [11]:
def CTL_archemedian_step(x, hv, hh, mw):
    a = ((hv/2)+(hh/2))/2
    deriv = mw/(np.pi*2)
    r = a + deriv * x
    
    return np.sqrt((a + deriv * x)**2 + (deriv) ** 2)

quad(CTL_archemedian_step, 0, np.pi*(25 + 22), args = (304.972, 262.968, 15.876))

def CTL_from_archemedian(df, label_dict, verbose=False):
    """Derive CTL by calculating the length of an archemedian spiral
    
    This seems to encode a chunk of error so might amplify error from other derivations.
    
    Also prints a lot of warnings, which should be investigated before integrating fully into the medic"""
    
    hv_lab = label_dict["hv"]
    hh_lab = label_dict["hh"]
    nv_lab = label_dict["nv"]
    nh_lab = label_dict["nh"]
    mw_lab = label_dict["mw"]
    ctl_lab = label_dict["ctl"]
    
    deriv_lab = ctl_lab
    
    #nv, nh, hv, hh, mw
    
    idxs = df.index[df[deriv_lab].isna()].tolist()
    pre = len(idxs)
    
    for i in idxs:
        hv = df.loc[i, hv_lab]
        hh = df.loc[i, hh_lab]
        nv = df.loc[i, nv_lab]
        nh = df.loc[i, nh_lab]
        mw = df.loc[i, mw_lab]
        if pd.isnull(hv + hh + nv + nh + mw):
            next
        intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))
        
        df.loc[i, ctl_lab] = intout[0]
    
    df[deriv_lab].mask(df[deriv_lab] <=0, np.NaN, inplace=True)
    
    diff = pre - len(df.index[df[deriv_lab].isna()].tolist())
    
    if verbose and diff > 0:
        print(f"    CTL from Archemedian Spiral EQ: {diff} changes.")
    return df
        

In [None]:
out = CTL_from_archemedian(fucked, label_dict, True)
fucked = sparseify_gsd(gsd)
out

In [12]:
def dv_from_dh(df, label_dict):
    dh = df[df["Vertical Diameter"].isna()]["Horizontal Diameter"]
    df["Vertical Diameter"] = df["Vertical Diameter"].mask(pd.isnull, dh)
    return df

def derive_assuming_symmetry(df,label_dict, verbose):
    """Derive values assuming web symmetry.
    
    NOTE: This can be quite a stretch, so is not enabled by default.
    
    There may be a lot more error in values at the end of chained derivations when these are used due to multiplicative error propagation.
    """
    
    dv_lab = label_dict["dv"]
    dh_lab = label_dict["dh"]
    hv_lab = label_dict["hv"]
    hh_lab = label_dict["hh"]
    nv_lab = label_dict["nv"]
    nh_lab = label_dict["nh"]
    
    pre_dv = df[dv_lab].isna().sum()  # Find number of NAs in dv
    dh = df[df[dv_lab].isna()][dh_lab]  # Find values from dh to be used to replace places where dv is NaN
    df[dv_lab] = df[dv_lab].mask(pd.isnull, dh)  # Perform replacement
    dvdiff = pre_dv - df[dv_lab].isna().sum()  # Calc number of changes
    
    
    pre_dh = df[dh_lab].isna().sum()
    dv = df[df[dh_lab].isna()][dv_lab]
    df[dh_lab] = df[dh_lab].mask(pd.isnull, dv)
    dhdiff = pre_dh - df[dh_lab].isna().sum()
    
    
    pre_hv = df[hv_lab].isna().sum()
    hh = df[df[hv_lab].isna()][hh_lab]
    df[hv_lab] = df[hv_lab].mask(pd.isnull, hh)
    hvdiff = pre_hv - df[hv_lab].isna().sum()
    
    
    pre_hh = df[hh_lab].isna().sum()
    hv = df[df[hh_lab].isna()][hv_lab]
    df[hh_lab] = df[hh_lab].mask(pd.isnull, hv)
    hhdiff = pre_hh - df[hh_lab].isna().sum()
    
    
    pre_nv = df[nv_lab].isna().sum()
    nh = df[df[nv_lab].isna()][nh_lab]
    df[nv_lab] = df[nv_lab].mask(pd.isnull, nh)
    nvdiff = pre_nv - df[nv_lab].isna().sum()
    
    
    pre_nh = df[nh_lab].isna().sum()
    nv = df[df[nh_lab].isna()][nv_lab]
    df[nh_lab] = df[nh_lab].mask(pd.isnull, nv)
    nhdiff = pre_nh - df[nh_lab].isna().sum()
    
    if verbose and sum([dvdiff, dhdiff, hvdiff, hhdiff, nvdiff, nhdiff])>0:
        print("    Naiive derivation assuming symmetry:")
        print(f"        dv from dh: {dvdiff} changes.")
        print(f"        dh from dv: {dhdiff} changes.")
        print(f"        hv from hh: {hvdiff} changes.")
        print(f"        hh from hv: {hhdiff} changes.")
        print(f"        nv from nh: {nvdiff} changes.")
        print(f"        nh from nv: {nhdiff} changes.")
    return df
    

In [None]:
out = hv_from_CA_eq(fucked, label_dict, verbose=True)
fucked = sparseify_gsd(gsd) # Just in case it is run accidentally

---
## Runner
---

In [25]:
def run_webmed(df, label_dict, assumption_level = 0, verbose=False):
    """Run web medic
    assumption levels:
        0: Only derive from direct rearrangements
        1: Make some biologically-relevant but literature-valid assumptions
        2: Assume web symmetry to increase coverage"""
    df = df.copy() # Deep copy the df before modification
    nulls = df.isnull().sum().sum()
    print(f"Initial nulls = {nulls}")
    run_counter = 0
    while True:
        run_counter += 1
        print(f"\nRun {run_counter} Start.")
        
        # Start processing
        df = CA_from_CA_eq(df, label_dict, verbose)
        df = MW_from_MW_eq(df, label_dict, verbose)
#         df = CTL_from_CTL_eq(df, label_dict, verbose)
        df = CTL_from_archemedian(df, label_dict, verbose)
        
        df = dv_from_MW_eq(df, label_dict, verbose)
        df = dh_from_MW_eq(df, label_dict, verbose)
        df = hv_from_MW_eq(df, label_dict, verbose)
        df = hh_from_MW_eq(df, label_dict, verbose)
        df = nv_from_MW_eq(df, label_dict, verbose)
        df = nh_from_MW_eq(df, label_dict, verbose)
        
        df = dv_from_CA_eq(df, label_dict, verbose)
        df = dh_from_CA_eq(df, label_dict, verbose)
        df = hv_from_CA_eq(df, label_dict, verbose)
        df = hh_from_CA_eq(df, label_dict, verbose)

        if assumption_level >=2:
            # These functions assume web symmetry
            df = derive_assuming_symmetry(df, label_dict, verbose)
        # End processing
        
        changes = abs(nulls-df.isnull().sum().sum())
        nulls = df.isnull().sum().sum()
        if changes <= 0:
            print(f"Run {run_counter} complete, no changes made.")
            break
        print(f"Run {run_counter} complete, made {changes} changes.")
    print(f"\nCompleted in {run_counter} runs, {nulls} NAs unresolved.")
    return df

In [26]:
out = run_webmed(fucked, label_dict, assumption_level=0, verbose=True)
out

Initial nulls = 2304

Run 1 Start.
    CA from CA EQ: 16 changes.
    MW from MW EQ: 4 changes.
    CTL from Archemedian Spiral EQ: 10 changes.
    dv from MW EQ: 4 changes.
    dh from MW EQ: 4 changes.
    hv from MW EQ: 4 changes.
    hh from MW EQ: 4 changes.


  integration interval.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))
  the requested tolerance from being achieved.  The error may be 
  underestimated.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))


    nv from MW EQ: 4 changes.
    nh from MW EQ: 4 changes.
    dv from CA EQ: 14 changes.
    dh from CA EQ: 14 changes.
Run 1 complete, made 82 changes.

Run 2 Start.
    CA from CA EQ: 8 changes.
    MW from MW EQ: 4 changes.
    CTL from Archemedian Spiral EQ: 10 changes.


  integration interval.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))
  the requested tolerance from being achieved.  The error may be 
  underestimated.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))
  integration interval.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))
  the requested tolerance from being achieved.  The error may be 
  underestimated.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))


    nv from MW EQ: 4 changes.
    nh from MW EQ: 4 changes.
Run 2 complete, made 30 changes.

Run 3 Start.
    CTL from Archemedian Spiral EQ: 4 changes.
Run 3 complete, made 4 changes.

Run 4 Start.
Run 4 complete, no changes made.

Completed in 4 runs, 2188 NAs unresolved.


  integration interval.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))
  the requested tolerance from being achieved.  The error may be 
  underestimated.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))


Unnamed: 0,UID,Cleared,Vertical Diameter,Vertical Hub Diameter,Horizontal Diameter,Horizontal Hub Diameter,Vertical Spiral Turns,Horizontal Spiral Turns,Mesh Width,Capture Area,CTL,Units
0,1,False,,,,,,,,,,Px
1,1,False,,,,,,,,,53125.810000,Px
2,1,False,,,,,,,,796496.000000,,Px
3,1,False,,,,,,,,796496.000000,53125.810000,Px
4,1,False,,,,,,,15.876000,,,Px
...,...,...,...,...,...,...,...,...,...,...,...,...
507,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.868665,796496.000000,53125.810000,Px
508,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.876000,714390.607746,48510.382025,Px
509,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.876000,714390.607746,53125.810000,Px
510,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.876000,796496.000000,48510.382025,Px


In [27]:
# With MW
print(f"Nulls: {out.isnull().sum().sum()}")
descriptives = out.describe(percentiles=[.5])
CV = descriptives.loc["std"]/descriptives.describe().loc["mean"] # Coeffiecient of variation
CV = CV.rename("cv")
descriptives = descriptives.append(CV)
descriptives = descriptives.rename({"50%": "median"}, axis='index')
descriptives

Nulls: 2188


Unnamed: 0,UID,Vertical Diameter,Vertical Hub Diameter,Horizontal Diameter,Horizontal Hub Diameter,Vertical Spiral Turns,Horizontal Spiral Turns,Mesh Width,Capture Area,CTL
count,512.0,274.0,260.0,274.0,260.0,264.0,264.0,264.0,280.0,280.0
mean,1.0,1113.632752,304.961167,898.428344,262.958521,25.054011,22.053927,15.893522,789467.846294,52828.833192
std,0.0,25.804551,0.086831,20.81778,0.075977,0.439119,0.441817,0.142567,22995.019824,1171.876648
min,1.0,1107.645,304.267857,893.597,262.351875,24.978965,21.979503,15.868665,714390.607746,48476.348703
median,1.0,1107.645,304.972,893.597,262.968,25.0,22.0,15.876,796496.0,53125.81
max,1.0,1224.632671,304.972,987.977268,262.968,28.697721,26.040059,17.087287,796496.0,55251.371463
cv,0.0,0.031901,0.000352,0.031475,0.000348,0.007156,0.007436,0.002601,0.044219,0.033302


In [28]:
out.loc[out["CTL"] > 5e06]

Unnamed: 0,UID,Cleared,Vertical Diameter,Vertical Hub Diameter,Horizontal Diameter,Horizontal Hub Diameter,Vertical Spiral Turns,Horizontal Spiral Turns,Mesh Width,Capture Area,CTL,Units


---
## Naiive run
---

In [29]:
out = run_webmed(fucked, label_dict, assumption_level=4, verbose=True)
out

Initial nulls = 2304

Run 1 Start.
    CA from CA EQ: 16 changes.
    MW from MW EQ: 4 changes.
    CTL from Archemedian Spiral EQ: 10 changes.
    dv from MW EQ: 4 changes.
    dh from MW EQ: 4 changes.
    hv from MW EQ: 4 changes.
    hh from MW EQ: 4 changes.
    nv from MW EQ: 4 changes.


  integration interval.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))
  the requested tolerance from being achieved.  The error may be 
  underestimated.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))


    nh from MW EQ: 4 changes.
    dv from CA EQ: 14 changes.
    dh from CA EQ: 14 changes.
    Naiive derivation assuming symmetry:
        dv from dh: 110 changes.
        dh from dv: 110 changes.
        hv from hh: 124 changes.
        hh from hv: 124 changes.
        nv from nh: 124 changes.
        nh from nv: 124 changes.
Run 1 complete, made 798 changes.

Run 2 Start.
    CA from CA EQ: 128 changes.
    MW from MW EQ: 104 changes.
    CTL from Archemedian Spiral EQ: 116 changes.


  integration interval.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))
  the requested tolerance from being achieved.  The error may be 
  underestimated.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))
  integration interval.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))
  the requested tolerance from being achieved.  The error may be 
  underestimated.
  intout = quad(CTL_archemedian_step, 0, np.pi*(nv+nh), args = (hv, hh, mw))


Run 2 complete, made 348 changes.

Run 3 Start.
Run 3 complete, no changes made.

Completed in 3 runs, 1158 NAs unresolved.


Unnamed: 0,UID,Cleared,Vertical Diameter,Vertical Hub Diameter,Horizontal Diameter,Horizontal Hub Diameter,Vertical Spiral Turns,Horizontal Spiral Turns,Mesh Width,Capture Area,CTL,Units
0,1,False,,,,,,,,,,Px
1,1,False,,,,,,,,,53125.810000,Px
2,1,False,,,,,,,,796496.000000,,Px
3,1,False,,,,,,,,796496.000000,53125.810000,Px
4,1,False,,,,,,,15.876000,,,Px
...,...,...,...,...,...,...,...,...,...,...,...,...
507,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.868665,796496.000000,53125.810000,Px
508,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.876000,714390.607746,48510.382025,Px
509,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.876000,714390.607746,53125.810000,Px
510,1,False,1107.645,304.972,893.597,262.968,25.0,22.0,15.876000,796496.000000,48510.382025,Px


In [30]:
# naiive run
print(f"Nulls: {out.isnull().sum().sum()}")
descriptives = out.describe(percentiles=[.5])
CV = descriptives.loc["std"]/descriptives.describe().loc["mean"] # Coeffiecient of variation
CV = CV.rename("cv")
descriptives = descriptives.append(CV)
descriptives = descriptives.rename({"50%": "median"}, axis='index')
descriptives

Nulls: 1158


Unnamed: 0,UID,Vertical Diameter,Vertical Hub Diameter,Horizontal Diameter,Horizontal Hub Diameter,Vertical Spiral Turns,Horizontal Spiral Turns,Mesh Width,Capture Area,CTL
count,512.0,384.0,384.0,384.0,384.0,384.0,384.0,364.0,400.0,382.0
mean,1.0,1050.601677,291.400874,958.360199,276.525374,24.031031,22.968536,15.944627,770923.505111,51660.797608
std,0.0,101.963932,19.661381,96.32853,19.67086,1.404454,1.404753,1.127192,87959.599341,3115.529675
min,1.0,893.597,262.968,893.597,262.351875,22.0,21.979503,12.263021,554104.434269,41007.278858
median,1.0,1107.645,304.972,893.597,262.968,25.0,22.0,15.876,796496.0,53125.81
max,1.0,1224.632671,304.972,1107.645,304.972,25.0,25.0,20.111357,909275.306275,56788.466361
cv,0.0,0.12846,0.075236,0.133372,0.078137,0.017503,0.017657,0.015753,0.169199,0.090708


---
## Minimizer tests
---

In [None]:
from scipy.optimize import minimize_scalar

# DON'T NEED THIS AS WE CAN JUST REARRANGE

def measure_from_CTL_obj_func(x, ctl, measures, counts):
    return abs(ctl - ((np.pi/16) * (sum(counts))*(x + sum(measures))))

def count_from_CTL_obj_func(x, ctl, measures, known_count):
    return abs(ctl - ((np.pi/16) * (x + known_count)*(sum(measures))))

minimize_scalar(measure_from_CTL_obj_func, args=(53125.81, (893.597, 304.972, 262.968), (51, 43)))


In [None]:
def CTL_archemedian_step(x, hv, hh, mw):
    a = ((hv/2)+(hh/2))/2
    deriv = mw/(np.pi*2)
    r = a + deriv * x
    
#     return np.sqrt(((hv*hh)/2 + (mw/(np.pi*2)) * x)**2 + (mw/(np.pi*2)) ** 2)
    return np.sqrt((a + deriv * x)**2 + (deriv) ** 2)

# quad(CTL_archemedian_step, 0, np.pi*(nv + nh), args = ())
quad(CTL_archemedian_step, 0, np.pi*(25 + 22), args = (304.972, 262.968, 15.876))

def hx_from_CTL_archemedian_obj_func(hx, h_present, nv, nh, mw, ctl):
    """Return the difference between the given ctl and the CTL from archemedian given one hub measure missing"""
    return(abs(ctl - quad(CTL_archemedian_step, 0, np.pi*(nv + nh), args = (hx, h_present, mw))[0]))

def nx_from_CTL_archemedian_obj_func(nx, n_present, hv, hh, mw, ctl):
    """Return the difference between the given ctl and the CTL from archemedian given one count missing"""
    return(abs(ctl - quad(CTL_archemedian_step, 0, np.pi*(nx + n_present), args = (hv, hh, mw))[0]))

def mw_from_CTL_archemedian_obj_func(mw, hv, hh, nv, nh, ctl):
    """Return the difference between the given ctl and the CTL from archemedian given one count missing"""
    return(abs(ctl - quad(CTL_archemedian_step, 0, np.pi*(nv + nh), args = (hv, hh, mw))[0]))

In [None]:
minimize_scalar(hx_from_CTL_archemedian_obj_func, args=(262.968, 25, 22, 15.876, 53125.81))

In [None]:
minimize_scalar(nx_from_CTL_archemedian_obj_func, args=(22, 304.972, 262.968, 15.876, 53125.81))

In [None]:
minimize_scalar(mw_from_CTL_archemedian_obj_func, args=(304.972, 262.968, 25, 22, 53125.81))