# Example with Numeric Target Concepts

In [2]:
import numpy as np
import pandas as pd
import pysubgroup as ps

In [14]:
from pysubgroup.datasets import get_credit_data
data = get_credit_data()

# which attribute are we interested in?
target = ps.NumericTarget("credit_amount")

# We define, which basic conditions we want to consider. 
# Here: automatically generated from all attributes that are not the target, name or Ticket
searchspace = ps.create_selectors(data, ignore=["credit_amount"])
print(searchspace)

# Define the parameters of the search
task = ps.SubgroupDiscoveryTask(
    data, target, searchspace, result_set_size=10, depth=3, qf=ps.StandardQFNumeric(1.0)
)

# Run the algorithm
result = ps.BeamSearch().execute(task)

# Print the result
result.to_dataframe()

[checking_status=='b'<0'', checking_status=='b'0<=X<200'', checking_status=='b'no checking'', checking_status=='b'>=200'', credit_history=='b'critical/other existing credit'', credit_history=='b'existing paid'', credit_history=='b'delayed previously'', credit_history=='b'no credits/all paid'', credit_history=='b'all paid'', purpose=='b'radio/tv'', purpose=='b'education'', purpose=='b'furniture/equipment'', purpose=='b'new car'', purpose=='b'used car'', purpose=='b'business'', purpose=='b'domestic appliance'', purpose=='b'repairs'', purpose=='b'other'', purpose=='b'retraining'', savings_status=='b'no known savings'', savings_status=='b'<100'', savings_status=='b'500<=X<1000'', savings_status=='b'>=1000'', savings_status=='b'100<=X<500'', employment=='b'>=7'', employment=='b'1<=X<4'', employment=='b'4<=X<7'', employment=='b'unemployed'', employment=='b'<1'', personal_status=='b'male single'', personal_status=='b'female div/dep/mar'', personal_status=='b'male div/sep'', personal_status=='

Unnamed: 0,quality,subgroup,size_sg,size_dataset,mean_sg,mean_dataset,std_sg,std_dataset,median_sg,median_dataset,max_sg,max_dataset,min_sg,min_dataset,mean_lift,median_lift
0,639577.046,duration>=30.0,213,1000,6273.967136,3271.258,3477.406285,2821.325155,5771.0,2319.5,18424.0,18424.0,909.0,250.0,1.917907,2.488036
1,624424.304,duration>=30.0 AND foreign_worker=='b'yes'',212,1000,6216.65566,3271.258,3383.751315,2821.325155,5756.5,2319.5,15945.0,18424.0,909.0,250.0,1.900387,2.481785
2,579219.206,duration>=30.0 AND other_parties=='b'none'',193,1000,6272.393782,3271.258,3474.426664,2821.325155,5771.0,2319.5,18424.0,18424.0,909.0,250.0,1.917426,2.488036
3,564066.464,duration>=30.0 AND foreign_worker=='b'yes'' AN...,192,1000,6209.104167,3271.258,3370.673183,2821.325155,5756.5,2319.5,15945.0,18424.0,909.0,250.0,1.898078,2.481785
4,547252.302,duration>=30.0 AND num_dependents==1.0,181,1000,6294.751381,3271.258,3570.855045,2821.325155,5771.0,2319.5,18424.0,18424.0,909.0,250.0,1.92426,2.488036
5,532099.56,duration>=30.0 AND foreign_worker=='b'yes'' AN...,180,1000,6227.366667,3271.258,3464.098443,2821.325155,5756.5,2319.5,15945.0,18424.0,909.0,250.0,1.903661,2.481785
6,491104.688,duration>=30.0 AND num_dependents==1.0 AND oth...,164,1000,6265.79878,3271.258,3566.324093,2821.325155,5756.5,2319.5,18424.0,18424.0,909.0,250.0,1.91541,2.481785
7,490633.14,duration>=30.0 AND other_payment_plans=='b'none'',170,1000,6157.335294,3271.258,3379.40037,2821.325155,5653.0,2319.5,15945.0,18424.0,909.0,250.0,1.882253,2.437163
8,490633.14,duration>=30.0 AND foreign_worker=='b'yes'' AN...,170,1000,6157.335294,3271.258,3379.40037,2821.325155,5653.0,2319.5,15945.0,18424.0,909.0,250.0,1.882253,2.437163
9,436843.784,duration>=30.0 AND other_parties=='b'none'' AN...,152,1000,6145.230263,3271.258,3377.930843,2821.325155,5653.0,2319.5,15945.0,18424.0,909.0,250.0,1.878553,2.437163


### Accessing statistics

In [11]:
# Generate some minimal example data
data = np.array([[1., 2., 3., 4., 5.], ["F", "F", "F", "Tr", "Tr"]]).T
data = pd.DataFrame(data, columns=["Target", "A"])
data["Target"] = data["Target"].astype(float)


# Define target and an examples selector
target = ps.NumericTarget("Target")
sgd = ps.EqualitySelector("A", "Tr")


# Compute statistics, the quality, and the optimistic estimate of the subgroup
target.calculate_statistics(sgd, data)
qf = ps.StandardQFNumeric(.5)
print(qf.evaluate(sgd, target, data))
print(qf.optimistic_estimate(sgd, target, data))

2.121320343559643
3.0


In [7]:
data.dtypes

Target    object
A         object
dtype: object