### Multi set: train, test

In [1]:
from functools import partial
from pandas import set_option, reset_option

from contest.data import load
from contest.exploration.descriptions import default_data_keys as data_keys
from contest.exploration.descriptions.types import (
    describe as describe_types,
    default_multi_params as types_describe_params,
)
from contest.exploration.descriptions.nulls import (
    describe as describe_nulls,
    default_multi_params as nulls_describe_params,
)
from contest.exploration.descriptions.uniques import (
    describe as describe_uniques,
    default_multi_params as uniques_describe_params,
)

load = partial(load, data_keys=data_keys)
describe_types = partial(describe_types, **types_describe_params)
describe_nulls = partial(describe_nulls, **nulls_describe_params)
describe_uniques = partial(describe_uniques, **uniques_describe_params)

#### Load

In [2]:
data_sets = load(print_info=True)

train: train.csv
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1460 entries, 1 to 1460
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   MSSubClass     1460 non-null   int64  
 1   MSZoning       1460 non-null   object 
 2   LotFrontage    1201 non-null   float64
 3   LotArea        1460 non-null   int64  
 4   Street         1460 non-null   object 
 5   Alley          91 non-null     object 
 6   LotShape       1460 non-null   object 
 7   LandContour    1460 non-null   object 
 8   Utilities      1460 non-null   object 
 9   LotConfig      1460 non-null   object 
 10  LandSlope      1460 non-null   object 
 11  Neighborhood   1460 non-null   object 
 12  Condition1     1460 non-null   object 
 13  Condition2     1460 non-null   object 
 14  BldgType       1460 non-null   object 
 15  HouseStyle     1460 non-null   object 
 16  OverallQual    1460 non-null   int64  
 17  OverallCond    1460 non-null   int6

#### Types

In [3]:
types_description = describe_types(data_sets)

In [4]:
# set_option(
#     'display.max_rows', types_description.shape[0],
#     'display.max_columns', types_description.shape[1],
# )

types_description

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,dataset,train,test
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,detect nulls,Unnamed: 4_level_1,Unnamed: 5_level_1
shape,rows,,,1460,1459
shape,columns,,,80,79
shape,columns,,False,61,46
shape,columns,,True,19,33
type,<class 'object'>,,,43,43
type,<class 'object'>,,False,27,21
type,<class 'object'>,,True,16,22
type,<class 'numpy.number'>,,,37,36
type,<class 'numpy.number'>,,False,34,25
type,<class 'numpy.number'>,,True,3,11


In [5]:
# reset_option('display.*')

#### Nulls 

In [6]:
nulls_description = describe_nulls(data_sets)

In [7]:
# set_option(
#     'display.max_rows', nulls_description.shape[0],
#     'display.max_columns', nulls_description.shape[1],
# )

nulls_description

Unnamed: 0_level_0,Unnamed: 1_level_0,type,type,counts,counts,counts,counts,counts,counts,portion,portion,portion,portion
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,null,null,notnull,notnull,null,null,notnull,notnull
Unnamed: 0_level_2,dataset,train,test,train,test,train,test,train,test,train,test,train,test
Unnamed: 0_level_3,variable,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
1,BsmtFinSF1,int64,float64,1460,1459,0,1,1460,1458,0.0,0.000685,1.0,0.999315
2,BsmtFinSF2,int64,float64,1460,1459,0,1,1460,1458,0.0,0.000685,1.0,0.999315
3,BsmtUnfSF,int64,float64,1460,1459,0,1,1460,1458,0.0,0.000685,1.0,0.999315
4,TotalBsmtSF,int64,float64,1460,1459,0,1,1460,1458,0.0,0.000685,1.0,0.999315
5,GarageCars,int64,float64,1460,1459,0,1,1460,1458,0.0,0.000685,1.0,0.999315
6,GarageArea,int64,float64,1460,1459,0,1,1460,1458,0.0,0.000685,1.0,0.999315
7,Exterior1st,object,object,1460,1459,0,1,1460,1458,0.0,0.000685,1.0,0.999315
8,Exterior2nd,object,object,1460,1459,0,1,1460,1458,0.0,0.000685,1.0,0.999315
9,KitchenQual,object,object,1460,1459,0,1,1460,1458,0.0,0.000685,1.0,0.999315
10,SaleType,object,object,1460,1459,0,1,1460,1458,0.0,0.000685,1.0,0.999315


In [8]:
# reset_option('display.*')

#### Uniques

In [9]:
uniques_description = describe_uniques(data_sets)

In [10]:
set_option(
    'display.max_rows', uniques_description.shape[0],
    # 'display.max_columns', uniques_description.shape[1],
)

uniques_description

Unnamed: 0_level_0,Unnamed: 1_level_0,type,type,counts,counts,counts,counts,counts,counts,unique,unique,unique,unique,unique,unique,unique,unique
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,non-null,non-null,null,null,counts,counts,counts,counts,ratio,ratio,ratio,ratio
Unnamed: 0_level_2,include nulls,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,True,True,False,False,True,True,False,False
Unnamed: 0_level_3,dataset,train,test,train,test,train,test,train,test,train,test,train,test,train,test,train,test
Unnamed: 0_level_4,variable,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4
1,Utilities,object,object,1460,1459,1460,1457,0,2,2,2,2,1,0.00137,0.001371,0.00137,0.000686
2,Street,object,object,1460,1459,1460,1459,0,0,2,2,2,2,0.00137,0.001371,0.00137,0.001371
3,CentralAir,object,object,1460,1459,1460,1459,0,0,2,2,2,2,0.00137,0.001371,0.00137,0.001371
4,Alley,object,object,1460,1459,91,107,1369,1352,3,3,2,2,0.002055,0.002056,0.021978,0.018692
5,HalfBath,int64,int64,1460,1459,1460,1459,0,0,3,3,3,3,0.002055,0.002056,0.002055,0.002056
6,LandSlope,object,object,1460,1459,1460,1459,0,0,3,3,3,3,0.002055,0.002056,0.002055,0.002056
7,PavedDrive,object,object,1460,1459,1460,1459,0,0,3,3,3,3,0.002055,0.002056,0.002055,0.002056
8,BsmtHalfBath,int64,float64,1460,1459,1460,1457,0,2,3,4,3,3,0.002055,0.002742,0.002055,0.002059
9,PoolQC,object,object,1460,1459,7,3,1453,1456,4,3,3,2,0.00274,0.002056,0.428571,0.666667
10,KitchenAbvGr,int64,int64,1460,1459,1460,1459,0,0,4,3,4,3,0.00274,0.002056,0.00274,0.002056


In [11]:
reset_option('display.*')