In [1]:
from pydisagg.disaggregate import split_datapoint
import numpy as np
from pydisagg.models import RateMultiplicativeModel
from pydisagg.models import LMO_model
from pydisagg.models import LogOdds_model
import pandas as pd

### Basic example using RateMultiplicativeModel and OddsMultiplicativeModel

In [2]:
populations=np.array([20,10,5])
rate_pattern=np.array([0.1,0.5,0.7])
observed_total=31.
observed_total_SE=2.5

In [3]:
rmm=RateMultiplicativeModel()
oddm=LogOdds_model()

In [4]:
print(oddm.fit_beta(observed_total,rate_pattern,populations))
print(rmm.fit_beta(observed_total,rate_pattern,populations))

3.679899347162453
1.0826119473216687


In [5]:
print(rmm.split_to_rates(observed_total,rate_pattern,populations))
print(oddm.split_to_rates(observed_total,rate_pattern,populations))

[0.2952381  1.47619048 2.06666667]
[0.81497625 0.97539516 0.98930469]


In [6]:
b=rmm.fit_beta(observed_total,populations,rate_pattern)

bse=rmm.beta_standard_error(
    b,
    populations,
    rate_pattern,
    observed_total_SE
)
print(bse)
print(bse * rmm.count_diff_beta(b,rate_pattern,populations))
print(np.sum(bse * rmm.count_diff_beta(b,rate_pattern,populations)))

0.08064516129032258
[0.47619048 1.19047619 0.83333333]
2.5


In [7]:
rmm.count_split_standard_errors(b,rate_pattern,populations,observed_total_SE)

array([0.47619048, 1.19047619, 0.83333333])

In [9]:
b=oddm.fit_beta(observed_total,rate_pattern,populations)

In [10]:
1/oddm.H_diff_beta(b,rate_pattern,populations)

0.30223366091526715

In [12]:
oddm.fit_beta(observed_total,rate_pattern,populations)

3.679899347162453

In [15]:
oddm.beta_standard_error(
    b,
    rate_pattern,
    populations,
    observed_total_SE,
)

0.7555841522881679

In [16]:
estimate,SE,CI=split_datapoint(
    observed_total,
    populations,
    rate_pattern,
    observed_total_SE,
    model=RateMultiplicativeModel()
)
print("Estimated incidence in each group")
print(estimate)

print("Standard Error of Above Estimates")
print(SE)

print("CI Lower")
print(CI[0])
print("CI Upper")
print(CI[1])


AttributeError: 'RateMultiplicativeModel' object has no attribute 'split_groups'

In [4]:
estimate,SE,CI=split_datapoint(
    observed_total,
    populations,
    rate_pattern,
    observed_total_SE,
    model=LogOdds_model()
)
print("Estimated incidence in each group")
print(estimate)

print("Standard Error of Above Estimates")
print(SE)

print("CI Lower")
print(CI[0])

print("CI Upper")
print(CI[1])



Estimated incidence in each group
[16.29952498  9.75395156  4.94652346]
Standard Error of Above Estimates
[2.27869012 0.18133601 0.03997387]
CI Lower
[11.83337441  9.39853952  4.86817611]
CI Upper
[20.76567555 10.1093636   5.02487081]


### Example where we get unreasonable estimates
The rate-multiplicative model here estimates 14 events in a group of only 10 people, while the LogOdds model provides a more reasonable estimate, adjusting the larger group more, and increasing the estimated rate for them relatively more, as we incorporate the prior information that the estimated rate cannot go above 1. 

In [5]:
populations=np.array([20,10])
pattern=np.array([0.1,0.5])
observed_total=20
print(split_datapoint(
    observed_total,
    populations,
    pattern,
    model=RateMultiplicativeModel()
))
print(split_datapoint(
    observed_total,
    populations,
    pattern,
    model=LogOdds_model()
))

[ 5.71428571 14.28571429]
[10.85582305  9.14417695]


# Example below is somewhat out of date, see dataframe splitting example

### More complex setting of having population sex ratio differ from study sex ratio

In [6]:
pop_men=1000
pop_women=1100
pop_by_sex=np.array([pop_men,pop_women])

study_prop_men=0.6
study_prop_women=0.4
study_props=np.array([study_prop_men,study_prop_women])

observed_study_rate=0.7
study_se=0.1

#This hack of using the ratio of incidence rates works when we assume multiplicativity in rate rather than in odds
baseline_male_rate=1.2
baseline_female_rate=1
sex_splitting_model=RateMultiplicativeModel(np.array([baseline_female_rate,baseline_male_rate]))

#Fit with study_props, the study population proportions
sex_splitting_model.fit_beta(
    bucket_populations=study_props,
    observed_total=observed_study_rate,
    observed_total_se=study_se
)

#Predict with population proportions
estimated_by_sex=sex_splitting_model.predict_count(pop_by_sex)
SE_by_sex=sex_splitting_model.predict_count_SE(pop_by_sex)
print("Estimates:")
print(estimated_by_sex)

print("SE")
print(SE_by_sex)


Estimates:
[648.14814815 855.55555556]
SE
[ 92.59259259 122.22222222]


### We set up a toy problem with data from two larger age groups to split first in age and then in sex

In [7]:
age_splitting_model=LMO_model(1)
sex_splitting_model=RateMultiplicativeModel()

age_groups=np.array([0,1,2,3])
population_age_pattern=np.array([100,200,300,100])
global_age_pattern=np.array([0.5,0.3,0.5,0.6])

within_age_sex_rate_ratios=np.array([1.2,1.1,1,0.8])
within_age_sex_proportions=np.array([
    [0.4,0.6],
    [0.5,0.5],
    [0.6,0.4],
    [0.7,0.3]
])

datapoint_age_partition=np.array([0,2,4])
observed_incidences=np.array([250,300])


age_results=np.zeros(0)
for observation,lower,upper in zip(observed_incidences,datapoint_age_partition[:-1],datapoint_age_partition[1:]):
    counts_by_age=split_datapoint(
        observation,
        population_age_pattern[lower:upper],
        global_age_pattern[lower:upper],
        model=age_splitting_model
    )
    age_results=np.append(age_results,counts_by_age)


In [8]:
combined_split_results=np.zeros((4,3))
combined_split_results[:,0]=age_results

for age_incidence,sex_proportions,population_at_age,rate_ratio,age_id in zip(
    age_results,
    within_age_sex_proportions,
    population_age_pattern,
    within_age_sex_rate_ratios,
    range(len(age_results))
    ):
    combined_split_results[age_id,1:3]=split_datapoint(
        age_incidence,
        population_at_age*sex_proportions,
        np.array([rate_ratio,1]),
        model=sex_splitting_model
    )
    
full_results=pd.DataFrame(combined_split_results,columns=['Total in Age Group','Male','Female'])

In [9]:
full_results

Unnamed: 0,Total in Age Group,Male,Female
0,90.253327,40.11259,50.140737
1,159.746673,83.676829,76.069844
2,219.615242,131.769145,87.846097
3,80.384758,52.343563,28.041195
