#### ***Eksik degerleri donusturme***
- One hot encoding: nominal degiskenlere uygulanabilir
- Integer encoding: ordinal (siralama) degiskenlere uygulanabilir

In [2]:
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import missingno
import json
warnings.filterwarnings('ignore')

# pandas varsayilan olarak cok sayida sutun veya satir varsa tumunu gostermez
# bu nedenle 100 sutun ve satir gostermesi icin
pd.options.display.max_rows = 100
pd.options.display.max_columns = 100

# virgulden sonra belirli sayida basamak gosterimi icin 
pd.options.display.float_format = '{:,.3f}'.format

# font tanimlamalari
title_font = {'family': 'times new roman', 'color': 'darkred','weight': 'bold','size': 14}
axis_font  = {'family': 'times new roman', 'color': 'darkred','weight': 'bold','size': 14}

# veri setini dataframe icerisine yukle
house_prices = pd.read_csv('train.csv')


- PoolQC ve FireplaceQu degiskenleri, ordinal (siralama) oldugundan integer encoding uygulanabilir.
- Alley, Fence, MiscFeature degiskenleri ise nominal degiskenler oldugundan one hot encoding uygulanabilir.
- Bunlari modelleme asamasinda gerekirse uygulayabiliriz.

In [3]:
#  encode edecegimiz house_prices i ayri tutalim
house_prices_encoded = house_prices.copy()

# eksik degerlerion fazla oldugu degiskenlerden nominal ve ordinal olmak uzere iki liste olusturduk
ordinals = ['PoolQC', 'FireplaceQu']
nominals = ['Alley','Fence','MiscFeature']

# nominal degiskenleri one hot encoding uygula ve dataframe ile birlestir 
for column in nominals:
    house_prices_encoded = pd.concat([house_prices_encoded, 
                                      pd.get_dummies(house_prices_encoded[column], 
                                                     prefix=column)], axis=1)

# nominal degiskenleri artik df den cikarabiliriz
#house_prices_encoded = house_prices_encoded.drop(nominals, axis=1)
# kategorik degiskenlerden de cikarabiliriz
#for i in nominals:
#    qualitative.remove(i)

# integer encoding icin mapping yapan bir sozluk olusturuldu
# None degerlere de sifir atandi
encode_nums = {"PoolQC":     {"Ex": 3, "Gd": 2, "Fa": 1, None: 0},
                "FireplaceQu": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, None: 0}}

# pandas in replace metodu sozluk kullanarak mapping yapabiliyor ve 
# verileri sozluge gore degistriebiliyor
house_prices_encoded.replace(encode_nums, inplace=True)
house_prices_encoded

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Alley_Grvl,Alley_Pave,Fence_GdPrv,Fence_GdWo,Fence_MnPrv,Fence_MnWw,MiscFeature_Gar2,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC
0,1,60,RL,65.000,8450,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,196.000,Gd,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,Ex,Y,SBrkr,856,854,0,1710,1,0,2,1,3,1,Gd,8,Typ,0,0,Attchd,2003.000,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,0,,,0,2,2008,WD,Normal,208500,0,0,0,0,0,0,0,0,0,0
1,2,20,RL,80.000,9600,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,6,8,1976,1976,Gable,CompShg,MetalSd,MetalSd,,0.000,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,Ex,Y,SBrkr,1262,0,0,1262,0,1,2,0,3,1,TA,6,Typ,1,3,Attchd,1976.000,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,0,,,0,5,2007,WD,Normal,181500,0,0,0,0,0,0,0,0,0,0
2,3,60,RL,68.000,11250,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,162.000,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,Ex,Y,SBrkr,920,866,0,1786,1,0,2,1,3,1,Gd,6,Typ,1,3,Attchd,2001.000,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,0,,,0,9,2008,WD,Normal,223500,0,0,0,0,0,0,0,0,0,0
3,4,70,RL,60.000,9550,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,5,1915,1970,Gable,CompShg,Wd Sdng,Wd Shng,,0.000,TA,TA,BrkTil,TA,Gd,No,ALQ,216,Unf,0,540,756,GasA,Gd,Y,SBrkr,961,756,0,1717,1,0,1,0,3,1,Gd,7,Typ,1,4,Detchd,1998.000,Unf,3,642,TA,TA,Y,0,35,272,0,0,0,0,,,0,2,2006,WD,Abnorml,140000,0,0,0,0,0,0,0,0,0,0
4,5,60,RL,84.000,14260,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,350.000,Gd,TA,PConc,Gd,TA,Av,GLQ,655,Unf,0,490,1145,GasA,Ex,Y,SBrkr,1145,1053,0,2198,1,0,2,1,4,1,Gd,9,Typ,1,3,Attchd,2000.000,RFn,3,836,TA,TA,Y,192,84,0,0,0,0,0,,,0,12,2008,WD,Normal,250000,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,62.000,7917,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1999,2000,Gable,CompShg,VinylSd,VinylSd,,0.000,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,953,953,GasA,Ex,Y,SBrkr,953,694,0,1647,0,0,2,1,3,1,TA,7,Typ,1,3,Attchd,1999.000,RFn,2,460,TA,TA,Y,0,40,0,0,0,0,0,,,0,8,2007,WD,Normal,175000,0,0,0,0,0,0,0,0,0,0
1456,1457,20,RL,85.000,13175,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,6,1978,1988,Gable,CompShg,Plywood,Plywood,Stone,119.000,TA,TA,CBlock,Gd,TA,No,ALQ,790,Rec,163,589,1542,GasA,TA,Y,SBrkr,2073,0,0,2073,1,0,2,0,3,1,TA,7,Min1,2,3,Attchd,1978.000,Unf,2,500,TA,TA,Y,349,0,0,0,0,0,0,MnPrv,,0,2,2010,WD,Normal,210000,0,0,0,0,1,0,0,0,0,0
1457,1458,70,RL,66.000,9042,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,9,1941,2006,Gable,CompShg,CemntBd,CmentBd,,0.000,Ex,Gd,Stone,TA,Gd,No,GLQ,275,Unf,0,877,1152,GasA,Ex,Y,SBrkr,1188,1152,0,2340,0,0,2,0,4,1,Gd,9,Typ,2,4,Attchd,1941.000,RFn,1,252,TA,TA,Y,0,60,0,0,0,0,0,GdPrv,Shed,2500,5,2010,WD,Normal,266500,0,0,1,0,0,0,0,0,1,0
1458,1459,20,RL,68.000,9717,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,6,1950,1996,Hip,CompShg,MetalSd,MetalSd,,0.000,TA,TA,CBlock,TA,TA,Mn,GLQ,49,Rec,1029,0,1078,GasA,Gd,Y,FuseA,1078,0,0,1078,1,0,1,0,2,1,Gd,5,Typ,0,0,Attchd,1950.000,Unf,1,240,TA,TA,Y,366,0,112,0,0,0,0,,,0,4,2010,WD,Normal,142125,0,0,0,0,0,0,0,0,0,0
