In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
import sys

logging.basicConfig(
    format='%(asctime)s | %(levelname)s : %(message)s',
    level=logging.DEBUG, stream=sys.stdout
)
logging.getLogger('matplotlib').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)

In [53]:
df = (
    pd.read_csv('data/ames.csv')
    .rename(columns=lambda col: col.replace('.', '_').lower())
)

In [82]:
import pandas as pd


def map_ratings(df, cols):
    ratings_map = {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1}
    df[cols] = df[cols].replace(ratings_map).fillna(0)
    return df

def select_dtypes_cols(df, include=None, exclude=None):
    return df.iloc[:0].select_dtypes(include, exclude).columns.to_list()

def convert_dtypes(df, select_dtype, fill_value, out_dtype):
    cols = select_dtypes_cols(df, select_dtype)
    df[cols] = df[cols].fillna(fill_value).astype(out_dtype)
    return df

def downcast_numeric(df, select_dtype, downcast):
    cols = select_dtypes_cols(df, select_dtype)
    df[cols] = df[cols].apply(pd.to_numeric, downcast=downcast)
    return df


ames = (
    pd.read_csv('data/ames.csv')
    .rename(columns=lambda col: col.replace('.', '_').lower())
    .assign(
        central_air=lambda df_: df_['central_air'].map({'Y': 1, 'N': 0}),
        stories=lambda df_:
            df_['house_style'].str.extract(r'(\d\.?\d?)').astype('float'),
        has_bsmt=lambda df_: df_['bsmt_qual'].notna(),
        has_garage=lambda df_: df_['garage_qual'].notna(),
        garage_yr_blt=lambda df_:
            df_['garage_yr_blt'].fillna(df_['year_built']),
        total_sf_=lambda df_:
            df_[['total_bsmt_sf', 'x1st_flr_sf', 'x2nd_flr_sf']].sum(1),
    )
    .pipe(map_ratings, [
        'exter_qual', 'exter_cond', 'bsmt_qual', 'bsmt_cond', 'kitchen_qual',
        'garage_qual', 'garage_cond', 'heating_qc', 'fireplace_qu', 'pool_qc'
    ])
    .pipe(convert_dtypes, 'object', 'Others', 'category')
    .pipe(convert_dtypes, 'float', 0, 'float')
    .pipe(downcast_numeric, 'int', 'unsigned')
    .pipe(downcast_numeric, 'float', 'float')
    .drop('pid', axis=1)
)
ames

Unnamed: 0,order,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod_add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,x1st_flr_sf,x2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,x3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,sale_condition,saleprice,stories,has_bsmt,has_garage,total_sf_
0,1,20,RL,141.0,31770,Pave,Others,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1960,1960,Hip,CompShg,BrkFace,Plywood,Stone,112.0,3,3,CBlock,3.0,4.0,Gd,BLQ,639.0,Unf,0.0,441.0,1080.0,GasA,2,1,SBrkr,1656,0,0,1656,1.0,0.0,1,0,3,1,3,7,Typ,2,4.0,Attchd,1960.0,Fin,2.0,528.0,3.0,3.0,P,210,62,0,0,0,0,0.0,Others,Others,0,5,2010,WD,Normal,215000,1.0,True,True,2736.0
1,2,20,RH,80.0,11622,Pave,Others,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,6,1961,1961,Gable,CompShg,VinylSd,VinylSd,,0.0,3,3,CBlock,3.0,3.0,No,Rec,468.0,LwQ,144.0,270.0,882.0,GasA,3,1,SBrkr,896,0,0,896,0.0,0.0,1,0,2,1,3,5,Typ,0,0.0,Attchd,1961.0,Unf,1.0,730.0,3.0,3.0,Y,140,0,0,0,120,0,0.0,MnPrv,Others,0,6,2010,WD,Normal,105000,1.0,True,True,1778.0
2,3,20,RL,81.0,14267,Pave,Others,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1958,1958,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,108.0,3,3,CBlock,3.0,3.0,No,ALQ,923.0,Unf,0.0,406.0,1329.0,GasA,3,1,SBrkr,1329,0,0,1329,0.0,0.0,1,1,3,1,4,6,Typ,0,0.0,Attchd,1958.0,Unf,1.0,312.0,3.0,3.0,Y,393,36,0,0,0,0,0.0,Others,Gar2,12500,6,2010,WD,Normal,172000,1.0,True,True,2658.0
3,4,20,RL,93.0,11160,Pave,Others,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,7,5,1968,1968,Hip,CompShg,BrkFace,BrkFace,,0.0,4,3,CBlock,3.0,3.0,No,ALQ,1065.0,Unf,0.0,1045.0,2110.0,GasA,5,1,SBrkr,2110,0,0,2110,1.0,0.0,2,1,3,1,5,8,Typ,2,3.0,Attchd,1968.0,Fin,2.0,522.0,3.0,3.0,Y,0,0,0,0,0,0,0.0,Others,Others,0,4,2010,WD,Normal,244000,1.0,True,True,4220.0
4,5,60,RL,74.0,13830,Pave,Others,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,5,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,,0.0,3,3,PConc,4.0,3.0,No,GLQ,791.0,Unf,0.0,137.0,928.0,GasA,4,1,SBrkr,928,701,0,1629,0.0,0.0,2,1,3,1,3,6,Typ,1,3.0,Attchd,1997.0,Fin,2.0,482.0,3.0,3.0,Y,212,34,0,0,0,0,0.0,MnPrv,Others,0,3,2010,WD,Normal,189900,2.0,True,True,2557.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2925,2926,80,RL,37.0,7937,Pave,Others,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,SLvl,6,6,1984,1984,Gable,CompShg,HdBoard,HdBoard,,0.0,3,3,CBlock,3.0,3.0,Av,GLQ,819.0,Unf,0.0,184.0,1003.0,GasA,3,1,SBrkr,1003,0,0,1003,1.0,0.0,1,0,3,1,3,6,Typ,0,0.0,Detchd,1984.0,Unf,2.0,588.0,3.0,3.0,Y,120,0,0,0,0,0,0.0,GdPrv,Others,0,3,2006,WD,Normal,142500,0.0,True,True,2006.0
2926,2927,20,RL,0.0,8885,Pave,Others,IR1,Low,AllPub,Inside,Mod,Mitchel,Norm,Norm,1Fam,1Story,5,5,1983,1983,Gable,CompShg,HdBoard,HdBoard,,0.0,3,3,CBlock,4.0,3.0,Av,BLQ,301.0,ALQ,324.0,239.0,864.0,GasA,3,1,SBrkr,902,0,0,902,1.0,0.0,1,0,2,1,3,5,Typ,0,0.0,Attchd,1983.0,Unf,2.0,484.0,3.0,3.0,Y,164,0,0,0,0,0,0.0,MnPrv,Others,0,6,2006,WD,Normal,131000,1.0,True,True,1766.0
2927,2928,85,RL,62.0,10441,Pave,Others,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,SFoyer,5,5,1992,1992,Gable,CompShg,HdBoard,Wd Shng,,0.0,3,3,PConc,4.0,3.0,Av,GLQ,337.0,Unf,0.0,575.0,912.0,GasA,3,1,SBrkr,970,0,0,970,0.0,1.0,1,0,3,1,3,6,Typ,0,0.0,Others,1992.0,Others,0.0,0.0,0.0,0.0,Y,80,32,0,0,0,0,0.0,MnPrv,Shed,700,7,2006,WD,Normal,132000,0.0,True,False,1882.0
2928,2929,20,RL,77.0,10010,Pave,Others,Reg,Lvl,AllPub,Inside,Mod,Mitchel,Norm,Norm,1Fam,1Story,5,5,1974,1975,Gable,CompShg,HdBoard,HdBoard,,0.0,3,3,CBlock,4.0,3.0,Av,ALQ,1071.0,LwQ,123.0,195.0,1389.0,GasA,4,1,SBrkr,1389,0,0,1389,1.0,0.0,1,0,2,1,3,6,Typ,1,3.0,Attchd,1975.0,RFn,2.0,418.0,3.0,3.0,Y,240,38,0,0,0,0,0.0,Others,Others,0,4,2006,WD,Normal,170000,1.0,True,True,2778.0


In [83]:
df

Unnamed: 0,order,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod_add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,x1st_flr_sf,x2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,x3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,sale_condition,saleprice
0,1,526301100,20,RL,141.0,31770,Pave,,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1960,1960,Hip,CompShg,BrkFace,Plywood,Stone,112.0,TA,TA,CBlock,TA,Gd,Gd,BLQ,639.0,Unf,0.0,441.0,1080.0,GasA,Fa,Y,SBrkr,1656,0,0,1656,1.0,0.0,1,0,3,1,TA,7,Typ,2,Gd,Attchd,1960.0,Fin,2.0,528.0,TA,TA,P,210,62,0,0,0,0,,,,0,5,2010,WD,Normal,215000
1,2,526350040,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,6,1961,1961,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,CBlock,TA,TA,No,Rec,468.0,LwQ,144.0,270.0,882.0,GasA,TA,Y,SBrkr,896,0,0,896,0.0,0.0,1,0,2,1,TA,5,Typ,0,,Attchd,1961.0,Unf,1.0,730.0,TA,TA,Y,140,0,0,0,120,0,,MnPrv,,0,6,2010,WD,Normal,105000
2,3,526351010,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1958,1958,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,108.0,TA,TA,CBlock,TA,TA,No,ALQ,923.0,Unf,0.0,406.0,1329.0,GasA,TA,Y,SBrkr,1329,0,0,1329,0.0,0.0,1,1,3,1,Gd,6,Typ,0,,Attchd,1958.0,Unf,1.0,312.0,TA,TA,Y,393,36,0,0,0,0,,,Gar2,12500,6,2010,WD,Normal,172000
3,4,526353030,20,RL,93.0,11160,Pave,,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,7,5,1968,1968,Hip,CompShg,BrkFace,BrkFace,,0.0,Gd,TA,CBlock,TA,TA,No,ALQ,1065.0,Unf,0.0,1045.0,2110.0,GasA,Ex,Y,SBrkr,2110,0,0,2110,1.0,0.0,2,1,3,1,Ex,8,Typ,2,TA,Attchd,1968.0,Fin,2.0,522.0,TA,TA,Y,0,0,0,0,0,0,,,,0,4,2010,WD,Normal,244000
4,5,527105010,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,5,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,PConc,Gd,TA,No,GLQ,791.0,Unf,0.0,137.0,928.0,GasA,Gd,Y,SBrkr,928,701,0,1629,0.0,0.0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1997.0,Fin,2.0,482.0,TA,TA,Y,212,34,0,0,0,0,,MnPrv,,0,3,2010,WD,Normal,189900
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2925,2926,923275080,80,RL,37.0,7937,Pave,,IR1,Lvl,AllPub,CulDSac,Gtl,Mitchel,Norm,Norm,1Fam,SLvl,6,6,1984,1984,Gable,CompShg,HdBoard,HdBoard,,0.0,TA,TA,CBlock,TA,TA,Av,GLQ,819.0,Unf,0.0,184.0,1003.0,GasA,TA,Y,SBrkr,1003,0,0,1003,1.0,0.0,1,0,3,1,TA,6,Typ,0,,Detchd,1984.0,Unf,2.0,588.0,TA,TA,Y,120,0,0,0,0,0,,GdPrv,,0,3,2006,WD,Normal,142500
2926,2927,923276100,20,RL,,8885,Pave,,IR1,Low,AllPub,Inside,Mod,Mitchel,Norm,Norm,1Fam,1Story,5,5,1983,1983,Gable,CompShg,HdBoard,HdBoard,,0.0,TA,TA,CBlock,Gd,TA,Av,BLQ,301.0,ALQ,324.0,239.0,864.0,GasA,TA,Y,SBrkr,902,0,0,902,1.0,0.0,1,0,2,1,TA,5,Typ,0,,Attchd,1983.0,Unf,2.0,484.0,TA,TA,Y,164,0,0,0,0,0,,MnPrv,,0,6,2006,WD,Normal,131000
2927,2928,923400125,85,RL,62.0,10441,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,SFoyer,5,5,1992,1992,Gable,CompShg,HdBoard,Wd Shng,,0.0,TA,TA,PConc,Gd,TA,Av,GLQ,337.0,Unf,0.0,575.0,912.0,GasA,TA,Y,SBrkr,970,0,0,970,0.0,1.0,1,0,3,1,TA,6,Typ,0,,,,,0.0,0.0,,,Y,80,32,0,0,0,0,,MnPrv,Shed,700,7,2006,WD,Normal,132000
2928,2929,924100070,20,RL,77.0,10010,Pave,,Reg,Lvl,AllPub,Inside,Mod,Mitchel,Norm,Norm,1Fam,1Story,5,5,1974,1975,Gable,CompShg,HdBoard,HdBoard,,0.0,TA,TA,CBlock,Gd,TA,Av,ALQ,1071.0,LwQ,123.0,195.0,1389.0,GasA,Gd,Y,SBrkr,1389,0,0,1389,1.0,0.0,1,0,2,1,TA,6,Typ,1,TA,Attchd,1975.0,RFn,2.0,418.0,TA,TA,Y,240,38,0,0,0,0,,,,0,4,2006,WD,Normal,170000
