# SampleSize Lib - Examples



Link: https://andriygav.github.io/SampleSizeLib/info.html

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import numpy as np
#
from samplesizelib.linear.statistical import LagrangeEstimator
from samplesizelib.linear.statistical import LikelihoodRatioEstimator
from samplesizelib.linear.statistical import WaldEstimator
from samplesizelib.linear.models      import RegressionModel
from samplesizelib.linear.models      import LogisticModel
#
from samplesizelib.linear.heuristic import CrossValidationEstimator
from samplesizelib.linear.heuristic import BootstrapEstimator
from samplesizelib.linear.heuristic import LogisticRegressionEstimator
from samplesizelib.linear.bayesian  import APVCEstimator
from samplesizelib.linear.bayesian  import ACCEstimator
from samplesizelib.linear.bayesian  import ALCEstimator
from samplesizelib.linear.bayesian  import MaxUtilityEstimator
from samplesizelib.linear.bayesian  import KLEstimator

# Datasets

## Generate dataset for regression and classification tasks.

In [3]:
# Initialization
#
np.random.seed(0)


n = 30
m = 100

In [4]:
# Regression dataset
#
X_rg = np.random.randn(m, n)
y_rg = np.random.randn(m)

print('[INFO] Dataset: Regression')
print('[INFO] Number of features:  ', X_rg.shape[1])
print('[INFO] Number of instances: ', X_rg.shape[0])
print('\n')



# Classification dataset
#
X_cl = np.random.randn(m, n)
y_cl = np.random.randint(2, size=m)


print('[INFO] Dataset: Classification')
print('[INFO] Number of features:  ', X_cl.shape[1])
print('[INFO] Number of instances: ', X_cl.shape[0])

[INFO] Dataset: Regression
[INFO] Number of features:   30
[INFO] Number of instances:  100


[INFO] Dataset: Classification
[INFO] Number of features:   30
[INFO] Number of instances:  100


## Preprocess

In [5]:
from sklearn.preprocessing import StandardScaler

# Setup scaler
#
scaler = StandardScaler()

# Apply scaler to Regression dataset
X_rg = scaler.fit_transform( X_rg )


# Apply scaler to Classification dataset
X_cl = scaler.fit_transform( X_cl )

# Regression

## Bayesian Methods

### Example of KL-divergence method

In [None]:
try:
    model = KLEstimator(RegressionModel)
    ret = model(X_rg, y_rg)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_rg.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of Max Utility method

In [None]:
try:
    model = MaxUtilityEstimator(RegressionModel)
    ret = model(X_rg, y_rg)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_rg.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of ALC method

In [None]:
try:
    model = ALCEstimator(RegressionModel)
    ret = model(X_rg, y_rg)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_rg.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of ACC method

In [None]:
try:
    model = ACCEstimator(RegressionModel)
    ret = model(X_rg, y_rg)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_rg.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of APVC method

In [None]:
try:
    model = APVCEstimator(RegressionModel)
    ret = model(X_rg, y_rg)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_rg.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

## Statistical methods

### Example of Lagrange based method

In [None]:
try:
    model = LagrangeEstimator(RegressionModel)
    ret = model(X_rg, y_rg)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_rg.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of Likelihood Ratio based method

In [None]:
try:
    model = LikelihoodRatioEstimator(RegressionModel)
    ret = model(X_rg, y_rg)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_rg.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of Wald based method

In [None]:
try:
    model = WaldEstimator(RegressionModel)
    ret = model(X_rg, y_rg)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_rg.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

## Heuristic Methods

### Example of Bootstrap based method:

In [None]:
try:
    model = BootstrapEstimator(RegressionModel)
    ret = model(X_rg, y_rg)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_rg.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of Cross Validation based method

In [None]:
try:
    model = CrossValidationEstimator(RegressionModel)
    ret = model(X_rg, y_rg)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_rg.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

# Classification

## Bayesian methods

### Example of KL-divergence method

In [None]:
try:
    model = KLEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of Max Utility method

In [None]:
try:
    model = MaxUtilityEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of ALC method

In [None]:
try:
    model = ALCEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of ACC method

In [None]:
try:
    model = ACCEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

### Example of APVC method

In [11]:
try:
    model = APVCEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

[INFO] Number of samples: 99 - Reduction: 1.0%


## Statistical methods

### Example of Lagrange based method

In [10]:
try:
    model = LagrangeEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

[INFO] Number of samples: 82 - Reduction: 18.0%


### Example of Likelihood Ratio based method

In [9]:
try:
    model = LikelihoodRatioEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

[INFO] Number of samples: 40 - Reduction: 60.0%


### Example of Wald based method

In [8]:
try:
    model = WaldEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

[INFO] Number of samples: 88 - Reduction: 12.0%


## Heuristic methods

### Example of Logistic Regression method

In [6]:
try:
    model = LogisticRegressionEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

[ERROR] Method cannot be applied
Message:  cannot convert float infinity to integer


### Example of Bootstrap based method:

In [7]:
try:
    model = BootstrapEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)

[INFO] Number of samples: 99 - Reduction: 1.0%


### Example of Cross Validation based method:

In [None]:
try:
    model = CrossValidationEstimator(LogisticModel)
    ret = model(X_cl, y_cl)

    print('[INFO] Number of samples: {} - Reduction: {:.1f}%'.format(ret['m*'], 100.*(1 - ret['m*'] / X_cl.shape[0])) )

except Exception as e:
    print('[ERROR] Method cannot be applied')
    print('Message: ', e)