In [29]:
import pandas as pd
import numpy as np

In [123]:
bike_data = pd.read_csv('../Datasets/bikes.csv')
# df_bike = bike_data[['season', 'mnth', 'holiday', 'weekday', 'workingday', 
#                      'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 
#                      'registered', 'cnt']]

df_bike = bike_data[['season', 'mnth', 'holiday', 'weekday', 'workingday', 
                     'weathersit',  'registered', 'cnt']]
df_bike.head()

Unnamed: 0,season,mnth,holiday,weekday,workingday,weathersit,registered,cnt
0,1,1,0,6,0,1,13,16
1,1,1,0,6,0,1,32,40
2,1,1,0,6,0,1,27,32
3,1,1,0,6,0,1,10,13
4,1,1,0,6,0,1,1,1


In [124]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

train_X, test_X, train_y, test_y = train_test_split(df_bike.drop('cnt', axis=1), df_bike['cnt'])

flip_test = test_X.copy()
flip_test['holiday'] = flip_test['holiday'].apply(lambda x: x^1)

rf_model = RandomForestRegressor(random_state=0).fit(train_X, train_y)
pred_y = rf_model.predict(test_X)
flip_pred_y = rf_model.predict(flip_test)



In [126]:
test_X = test_X.reset_index(drop=True)
test_X.head()

Unnamed: 0,season,mnth,holiday,weekday,workingday,weathersit,registered
0,1,12,0,2,1,3,44
1,2,5,0,6,0,1,33
2,3,9,0,2,1,1,189
3,2,5,0,4,1,1,456
4,2,5,0,4,1,1,138


In [127]:
holiday_index = test_X.loc[test_X['holiday'] == 1].index
no_holiday_index = test_X.loc[test_X['holiday'] == 0].index

# difference in bike sharing number between holiday and non-holidays
diff = np.mean(pred_y[holiday_index]) - np.mean(pred_y[no_holiday_index])
# difference after fliping the value of "holiday"
flip_diff = np.mean(flip_pred_y[no_holiday_index]) - np.mean(flip_pred_y[holiday_index])
avg_effect = flip_diff - diff

print('average effect: ', avg_effect)

average effect:  104.22893523890633


In [128]:
flip_test['deviation'] = (flip_pred_y - pred_y) - avg_effect
flip_test.head()

Unnamed: 0,season,mnth,holiday,weekday,workingday,weathersit,registered,deviation
8542,1,12,1,2,1,3,44,-93.928935
3273,2,5,1,6,0,1,33,-102.928935
14753,3,9,1,2,1,1,189,-104.228935
2900,2,5,1,4,1,1,456,-104.228935
12101,2,5,1,4,1,1,138,-104.228935


In [129]:
import pysubgroup as ps

target = ps.NumericTarget('deviation')
searchSpace = ps.create_selectors(flip_test, ignore=['deviation'])
task = ps.SubgroupDiscoveryTask (flip_test, target, searchSpace, depth=4, qf=ps.StandardQF_numeric(1))
result = ps.BeamSearch().execute(task)
for (q, sg) in result:
    print (str(q) + ":\t" + str(sg.subgroup_description))

3726.769622075664:	holiday=1 AND registered: [23:82[
3524.0436695851213:	registered: [23:82[
2278.1733750088924:	holiday=1 AND registered: [23:82[ AND weathersit=1
2215.7353870246025:	holiday=1 AND workingday=1
2215.7353870246025:	workingday=1
2148.6575873543497:	holiday=1 AND registered: [82:150[
2098.4999838790764:	registered: [23:82[ AND weathersit=1
2066.2096231719916:	holiday=1 AND workingday=1 AND registered: [23:82[
2066.2096231719916:	workingday=1 AND registered: [23:82[
1912.3597445784148:	registered: [82:150[


In [130]:
df = ps.as_df(flip_test, result, statistics_to_show=ps.all_statistics_numeric)
df

Unnamed: 0,quality,subgroup,size_sg,size_dataset,mean_sg,mean_dataset,std_sg,std_dataset,median_sg,median_dataset,max_sg,max_dataset,min_sg,min_dataset,mean_lift,median_lift
0,3726.769622,holiday=1 AND registered: [23:82[,826.0,4345.0,-99.269351,-103.781179,6.90695,15.22033,-101.328935,-104.228935,-55.528935,14.171065,-116.578935,-292.628935,0.956526,0.972177
1,3524.04367,registered: [23:82[,859.0,4345.0,-99.678684,-103.781179,7.20613,15.22033,-101.628935,-104.228935,-55.528935,14.171065,-136.528935,-292.628935,0.96047,0.975055
2,2278.173375,holiday=1 AND registered: [23:82[ AND weathers...,486.0,4345.0,-99.09358,-103.781179,6.972106,15.22033,-100.953935,-104.228935,-55.528935,14.171065,-116.578935,-292.628935,0.954832,0.968579
3,2215.735387,holiday=1 AND workingday=1,2951.0,4345.0,-103.030337,-103.781179,3.593314,15.22033,-104.228935,-104.228935,-74.928935,14.171065,-110.028935,-292.628935,0.992765,1.0
4,2215.735387,workingday=1,2951.0,4345.0,-103.030337,-103.781179,3.593314,15.22033,-104.228935,-104.228935,-74.928935,14.171065,-110.028935,-292.628935,0.992765,1.0
5,2148.657587,holiday=1 AND registered: [82:150[,853.0,4345.0,-101.262237,-103.781179,8.821985,15.22033,-104.228935,-104.228935,-51.928935,14.171065,-129.228935,-292.628935,0.975728,1.0
6,2098.499984,registered: [23:82[ AND weathersit=1,509.0,4345.0,-99.658389,-103.781179,7.46034,15.22033,-101.328935,-104.228935,-55.528935,14.171065,-136.528935,-292.628935,0.960274,0.972177
7,2066.209623,holiday=1 AND workingday=1 AND registered: [23...,496.0,4345.0,-99.615434,-103.781179,5.747332,15.22033,-101.453935,-104.228935,-75.128935,14.171065,-110.028935,-292.628935,0.95986,0.973376
8,2066.209623,workingday=1 AND registered: [23:82[,496.0,4345.0,-99.615434,-103.781179,5.747332,15.22033,-101.453935,-104.228935,-75.128935,14.171065,-110.028935,-292.628935,0.95986,0.973376
9,1912.359745,registered: [82:150[,872.0,4345.0,-101.588106,-103.781179,9.45744,15.22033,-104.228935,-104.228935,-51.928935,14.171065,-162.328935,-292.628935,0.978868,1.0


In [138]:
l = [False, True, False]
y = lambda x : 1^x
[y(i) for i in l]

[1, 0, 1]

In [142]:
1 == 1

True