## Değer Atama Yöntemleri

In [None]:
import numpy as np
import pandas as pd
V1 = np.array([1,3,6,np.NaN,7,1,np.NaN,9,15])
V2 = np.array([7,np.NaN,5,8,12,np.NaN,np.NaN,2,3])
V3 = np.array([np.NaN,12,5,6,14,7,np.NaN,2,31])

df = pd.DataFrame(
        {"V1" : V1,
         "V2" : V2,
         "V3" : V3}        
)

df

In [None]:
#sayısal degiskenlerde atama 

In [None]:
df["V1"].fillna(0)

In [None]:
df

In [None]:
df["V1"].fillna(df["V1"].mean())

In [None]:
#tum degiskenler icin birinci yol
df.apply(lambda x: x.fillna(x.mean()), axis = 0)

In [None]:
#ikinci yol

In [None]:
df.fillna(df.mean()[:])

In [None]:
df.fillna(df.mean()["V1":"V2"])

In [None]:
df["V3"].fillna(df["V3"].median())

In [None]:
#ucuncu yol

In [None]:
df.where(pd.notna(df), df.mean(), axis = "columns")

## Kategorik Değişken Kırılımında Değer Atama

In [None]:
V1 = np.array([1,3,6,np.NaN,7,1,np.NaN,9,15])
V2 = np.array([7,np.NaN,5,8,12,np.NaN,np.NaN,2,3])
V3 = np.array([np.NaN,12,5,6,14,7,np.NaN,2,31])
V4 = np.array(["IT","IT","IK","IK","IK","IK","IK","IT","IT"])

df = pd.DataFrame(
        {"maas" : V1,
         "V2" : V2,
         "V3" : V3,
        "departman" : V4}        
)

df

In [None]:
df.groupby("departman")["maas"].mean()

In [None]:
df["maas"].fillna(df.groupby("departman")["maas"].transform("mean"))

## Kategorik Değişkenler için Eksik Değer Atama

In [1]:
import numpy as np
import pandas as pd
V1 = np.array([1,3,6,np.NaN,7,1,np.NaN,9,15])
V4 = np.array(["IT",np.nan,"IK","IK","IK","IK","IK","IT","IT"], dtype=object)

df = pd.DataFrame(
        {"maas" : V1,
        "departman" : V4}        
)

df

Unnamed: 0,maas,departman
0,1.0,IT
1,3.0,
2,6.0,IK
3,,IK
4,7.0,IK
5,1.0,IK
6,,IK
7,9.0,IT
8,15.0,IT


In [2]:
df["departman"].mode()[0]

'IK'

In [3]:
df["departman"].fillna(df["departman"].mode()[0])

0    IT
1    IK
2    IK
3    IK
4    IK
5    IK
6    IK
7    IT
8    IT
Name: departman, dtype: object

In [5]:
df

Unnamed: 0,maas,departman
0,1.0,IT
1,3.0,
2,6.0,IK
3,,IK
4,7.0,IK
5,1.0,IK
6,,IK
7,9.0,IT
8,15.0,IT


In [6]:
df["departman"].fillna(method = "bfill")

0    IT
1    IK
2    IK
3    IK
4    IK
5    IK
6    IK
7    IT
8    IT
Name: departman, dtype: object

In [7]:
df["departman"].fillna(method = "ffill")

0    IT
1    IT
2    IK
3    IK
4    IK
5    IK
6    IK
7    IT
8    IT
Name: departman, dtype: object

## Tahmine Dayalı Değer Atama Yöntemleri

In [1]:
import seaborn as sns
import missingno as msno
df = sns.load_dataset('titanic')
df = df.select_dtypes(include = ['float64', 'int64'])
print(df.head())
df.isnull().sum()

   survived  pclass   age  sibsp  parch     fare
0         0       3  22.0      1      0   7.2500
1         1       1  38.0      1      0  71.2833
2         1       3  26.0      0      0   7.9250
3         1       1  35.0      1      0  53.1000
4         0       3  35.0      0      0   8.0500


survived      0
pclass        0
age         177
sibsp         0
parch         0
fare          0
dtype: int64

In [2]:
!pip install ycimpute



In [3]:
from ycimpute.imputer import knnimput

In [4]:
var_names = list(df)

In [6]:
import numpy as np
n_df = np.array(df)

In [8]:
n_df[0:10]

array([[ 0.    ,  3.    , 22.    ,  1.    ,  0.    ,  7.25  ],
       [ 1.    ,  1.    , 38.    ,  1.    ,  0.    , 71.2833],
       [ 1.    ,  3.    , 26.    ,  0.    ,  0.    ,  7.925 ],
       [ 1.    ,  1.    , 35.    ,  1.    ,  0.    , 53.1   ],
       [ 0.    ,  3.    , 35.    ,  0.    ,  0.    ,  8.05  ],
       [ 0.    ,  3.    ,     nan,  0.    ,  0.    ,  8.4583],
       [ 0.    ,  1.    , 54.    ,  0.    ,  0.    , 51.8625],
       [ 0.    ,  3.    ,  2.    ,  3.    ,  1.    , 21.075 ],
       [ 1.    ,  3.    , 27.    ,  0.    ,  2.    , 11.1333],
       [ 1.    ,  2.    , 14.    ,  1.    ,  0.    , 30.0708]])

In [9]:
n_df.shape

(891, 6)

In [10]:
dff = knnimput.KNN(k = 4).complete(n_df)

Imputing row 1/891 with 0 missing, elapsed time: 0.127
Imputing row 101/891 with 0 missing, elapsed time: 0.128
Imputing row 201/891 with 0 missing, elapsed time: 0.129
Imputing row 301/891 with 1 missing, elapsed time: 0.130
Imputing row 401/891 with 0 missing, elapsed time: 0.131
Imputing row 501/891 with 0 missing, elapsed time: 0.132
Imputing row 601/891 with 0 missing, elapsed time: 0.133
Imputing row 701/891 with 0 missing, elapsed time: 0.134
Imputing row 801/891 with 0 missing, elapsed time: 0.135


In [11]:
type(dff)

numpy.ndarray

In [13]:
import pandas as pd
dff = pd.DataFrame(dff, columns = var_names)

In [14]:
type(dff)

pandas.core.frame.DataFrame

In [15]:
dff.isnull().sum()

survived    0
pclass      0
age         0
sibsp       0
parch       0
fare        0
dtype: int64

In [None]:
#random forests

In [16]:
import seaborn as sns
import missingno as msno
df = sns.load_dataset('titanic')
df = df.select_dtypes(include = ['float64', 'int64'])

In [17]:
df.isnull().sum()

survived      0
pclass        0
age         177
sibsp         0
parch         0
fare          0
dtype: int64

In [18]:
var_names = list(df)

In [19]:
import numpy as np
n_df = np.array(df)

In [20]:
from ycimpute.imputer import iterforest
dff = iterforest.IterImput().complete(n_df)



In [21]:
dff = pd.DataFrame(dff, columns = var_names)

In [22]:
dff.isnull().sum()

survived    0
pclass      0
age         0
sibsp       0
parch       0
fare        0
dtype: int64

In [None]:
#EM 

In [23]:
import seaborn as sns
import missingno as msno
df = sns.load_dataset('titanic')
df = df.select_dtypes(include = ['float64', 'int64'])

In [24]:
from ycimpute.imputer import EM

In [25]:
var_names = list(df)

In [26]:
import numpy as np
n_df = np.array(df)

In [27]:
dff = EM().complete(n_df)

In [28]:
dff = pd.DataFrame(dff, columns = var_names)

In [29]:
dff.isnull().sum()

survived    0
pclass      0
age         0
sibsp       0
parch       0
fare        0
dtype: int64