# <center>Data engineering</center>

# 1. Installing `ucimlrepo` and importing necessary libraries
Since competition's data was a synthesized from [Secondary Mushroom](https://archive.ics.uci.edu/dataset/848/secondary+mushroom+dataset) dataset hence we need to install `ucimlrepo` library.

In [1]:
!pip install -q ucimlrepo

In [2]:
import warnings

warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn")

import pandas as pd
pd.options.display.float_format = '{:.2f}'.format

import numpy as np

from sklearn.model_selection import train_test_split

from ucimlrepo import fetch_ucirepo

In [3]:
RAND = 42

# 2. Importing data from the competition dataset

In [4]:
X = pd.read_csv('/kaggle/input/playground-series-s4e8/train.csv', index_col='id')
X_val = pd.read_csv('/kaggle/input/playground-series-s4e8/test.csv', index_col='id')
X.head()

Unnamed: 0_level_0,class,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,...,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,e,8.8,f,s,u,f,a,c,w,4.51,...,,,w,,,f,f,,d,a
1,p,4.51,x,h,o,f,a,c,n,4.79,...,,y,o,,,t,z,,d,w
2,e,6.94,f,s,b,f,x,c,w,6.85,...,,s,n,,,f,f,,l,w
3,e,3.88,f,y,g,f,s,,g,4.16,...,,,w,,,f,f,,d,u
4,e,5.85,x,l,w,f,d,,w,3.37,...,,,w,,,f,f,,g,a


In [5]:
init_fts = list(X.columns)

In [6]:
original = fetch_ucirepo(id=848)['data']['original']
original.head()

Unnamed: 0,class,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,...,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
0,p,15.26,x,g,o,f,e,,w,16.95,...,s,y,w,u,w,t,g,,d,w
1,p,16.6,x,g,o,f,e,,w,17.99,...,s,y,w,u,w,t,g,,d,u
2,p,14.07,x,g,o,f,e,,w,17.8,...,s,y,w,u,w,t,g,,d,w
3,p,14.17,f,h,e,f,e,,w,15.77,...,s,y,w,u,w,t,p,,d,w
4,p,14.64,x,h,o,f,e,,w,16.53,...,s,y,w,u,w,t,p,,d,w


In [7]:
X.shape

(3116945, 21)

In [8]:
X.isna().sum()

class                         0
cap-diameter                  4
cap-shape                    40
cap-surface              671023
cap-color                    12
does-bruise-or-bleed          8
gill-attachment          523936
gill-spacing            1258435
gill-color                   57
stem-height                   0
stem-width                    0
stem-root               2757023
stem-surface            1980861
stem-color                   38
veil-type               2957493
veil-color              2740947
has-ring                     24
ring-type                128880
spore-print-color       2849682
habitat                      45
season                        0
dtype: int64

In [9]:
for col in X.columns:
    print(f"X_train - {col}: {X.isna().sum()[col]/X.shape[0]}")

X_train - class: 0.0
X_train - cap-diameter: 1.283307854325309e-06
X_train - cap-shape: 1.2833078543253089e-05
X_train - cap-surface: 0.21528227158323293
X_train - cap-color: 3.849923562975927e-06
X_train - does-bruise-or-bleed: 2.566615708650618e-06
X_train - gill-attachment: 0.16809279599094626
X_train - gill-spacing: 0.4037398799144675
X_train - gill-color: 1.828713692413565e-05
X_train - stem-height: 0.0
X_train - stem-width: 0.0
X_train - stem-root: 0.8845273176138816
X_train - stem-surface: 0.6355136199066714
X_train - stem-color: 1.2191424616090435e-05
X_train - veil-type: 0.9488434990030302
X_train - veil-color: 0.8793697033473481
X_train - has-ring: 7.699847125951853e-06
X_train - ring-type: 0.04134817906636145
X_train - spore-print-color: 0.9142548232323637
X_train - habitat: 1.4437213361159726e-05
X_train - season: 0.0


In [10]:
X_val.isna().sum()

cap-diameter                  7
cap-shape                    31
cap-surface              446904
cap-color                    13
does-bruise-or-bleed         10
gill-attachment          349821
gill-spacing             839595
gill-color                   49
stem-height                   1
stem-width                    0
stem-root               1838012
stem-surface            1321488
stem-color                   21
veil-type               1971545
veil-color              1826124
has-ring                     19
ring-type                 86195
spore-print-color       1899617
habitat                      25
season                        0
dtype: int64

In [11]:
for col in X_val.columns:
    print(f"X_val - {col}: {X_val.isna().sum()[col]/X_val.shape[0]}")

X_val - cap-diameter: 3.368682036839907e-06
X_val - cap-shape: 1.4918449020291015e-05
X_val - cap-surface: 0.2150682109988431
X_val - cap-color: 6.256123782702684e-06
X_val - does-bruise-or-bleed: 4.812402909771296e-06
X_val - gill-attachment: 0.16834795982991044
X_val - gill-spacing: 0.40404694210294306
X_val - gill-color: 2.358077425787935e-05
X_val - stem-height: 4.812402909771296e-07
X_val - stem-width: 0.0
X_val - stem-root: 0.8845254296994558
X_val - stem-surface: 0.635953269642785
X_val - stem-color: 1.0106046110519721e-05
X_val - veil-type: 0.9487868894745048
X_val - veil-color: 0.8788044451203197
X_val - has-ring: 9.143565528565462e-06
X_val - ring-type: 0.041480506880773683
X_val - spore-print-color: 0.9141722378251019
X_val - habitat: 1.2031007274428239e-05
X_val - season: 0.0


In [12]:
X.dtypes

class                    object
cap-diameter            float64
cap-shape                object
cap-surface              object
cap-color                object
does-bruise-or-bleed     object
gill-attachment          object
gill-spacing             object
gill-color               object
stem-height             float64
stem-width              float64
stem-root                object
stem-surface             object
stem-color               object
veil-type                object
veil-color               object
has-ring                 object
ring-type                object
spore-print-color        object
habitat                  object
season                   object
dtype: object

In [13]:
X.columns

Index(['class', 'cap-diameter', 'cap-shape', 'cap-surface', 'cap-color',
       'does-bruise-or-bleed', 'gill-attachment', 'gill-spacing', 'gill-color',
       'stem-height', 'stem-width', 'stem-root', 'stem-surface', 'stem-color',
       'veil-type', 'veil-color', 'has-ring', 'ring-type', 'spore-print-color',
       'habitat', 'season'],
      dtype='object')

# 3. Changing dtypes of columns
In order to speed up model training and data manipulation we have to change some columns' data types to categorical and float.

In [14]:
float_fts = ['cap-diameter', 'stem-height', 'stem-width']
cat_fts = [ft for ft in init_fts if ft not in float_fts and ft != 'class']
cat_fts

['cap-shape',
 'cap-surface',
 'cap-color',
 'does-bruise-or-bleed',
 'gill-attachment',
 'gill-spacing',
 'gill-color',
 'stem-root',
 'stem-surface',
 'stem-color',
 'veil-type',
 'veil-color',
 'has-ring',
 'ring-type',
 'spore-print-color',
 'habitat',
 'season']

# 4. Clean data
Since, according to the contest description, the data has some additional noise added, we need to get rid of useless data (data which doesn't appear in original dataset). We can replace such datapoints with `NaN`s. Then we just need to factorize it for easier training.

In [15]:
def clean_cat(X):
    for col in X.columns:
        if X.dtypes[col] != 'object': continue
        X[col], _ = pd.factorize(X[col])
        X[col] = X[col].astype(int)
    return X

In [16]:
def rem_out(X):
    for ft in cat_fts:
        cats = sorted(list(set(original[ft].dropna())))
        print(f"For {ft}: {cats}")
        X.loc[~X[ft].isin(cats), ft] = np.nan
    return X

In [17]:
y = X.pop('class').map({'p': 1, 'e': 0})
y

id
0          0
1          1
2          0
3          0
4          0
          ..
3116940    0
3116941    0
3116942    1
3116943    0
3116944    1
Name: class, Length: 3116945, dtype: int64

In [18]:
df = clean_cat(rem_out(pd.concat([X, X_val])))
df

For cap-shape: ['b', 'c', 'f', 'o', 'p', 's', 'x']
For cap-surface: ['d', 'e', 'g', 'h', 'i', 'k', 'l', 's', 't', 'w', 'y']
For cap-color: ['b', 'e', 'g', 'k', 'l', 'n', 'o', 'p', 'r', 'u', 'w', 'y']
For does-bruise-or-bleed: ['f', 't']
For gill-attachment: ['a', 'd', 'e', 'f', 'p', 's', 'x']
For gill-spacing: ['c', 'd', 'f']
For gill-color: ['b', 'e', 'f', 'g', 'k', 'n', 'o', 'p', 'r', 'u', 'w', 'y']
For stem-root: ['b', 'c', 'f', 'r', 's']
For stem-surface: ['f', 'g', 'h', 'i', 'k', 's', 't', 'y']
For stem-color: ['b', 'e', 'f', 'g', 'k', 'l', 'n', 'o', 'p', 'r', 'u', 'w', 'y']
For veil-type: ['u']
For veil-color: ['e', 'k', 'n', 'u', 'w', 'y']
For has-ring: ['f', 't']
For ring-type: ['e', 'f', 'g', 'l', 'm', 'p', 'r', 'z']
For spore-print-color: ['g', 'k', 'n', 'p', 'r', 'u', 'w']
For habitat: ['d', 'g', 'h', 'l', 'm', 'p', 'u', 'w']
For season: ['a', 's', 'u', 'w']


Unnamed: 0_level_0,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,stem-width,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,8.80,0,0,0,0,0,0,0,4.51,15.39,-1,-1,0,-1,-1,0,0,-1,0,0
1,4.51,1,1,1,0,0,0,1,4.79,6.48,-1,0,1,-1,-1,1,1,-1,0,1
2,6.94,0,0,2,0,1,0,0,6.85,9.93,-1,1,2,-1,-1,0,0,-1,1,1
3,3.88,0,2,3,0,2,-1,2,4.16,6.53,-1,-1,0,-1,-1,0,0,-1,0,2
4,5.85,1,3,4,0,3,-1,0,3.37,8.36,-1,-1,0,-1,-1,0,0,-1,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5194904,0.88,1,6,4,0,0,1,0,2.67,1.35,-1,-1,4,-1,-1,0,0,-1,0,2
5194905,3.12,1,0,4,0,3,0,0,2.69,7.38,-1,-1,0,-1,-1,0,0,-1,2,0
5194906,5.73,1,5,6,0,0,-1,0,6.16,9.74,-1,-1,3,-1,1,1,1,-1,0,0
5194907,5.03,3,6,5,0,0,1,2,6.00,3.46,-1,1,8,-1,-1,0,0,-1,0,0


In [19]:
for ft in cat_fts:
    print(f"Uniqe {ft}: {list(df[ft].unique())}")

Uniqe cap-shape: [0, 1, 2, 3, 4, 5, 6, -1]
Uniqe cap-surface: [0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10]
Uniqe cap-color: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1]
Uniqe does-bruise-or-bleed: [0, 1, -1]
Uniqe gill-attachment: [0, 1, 2, 3, 4, -1, 5, 6]
Uniqe gill-spacing: [0, -1, 1, 2]
Uniqe gill-color: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1]
Uniqe stem-root: [-1, 0, 1, 2, 3, 4]
Uniqe stem-surface: [-1, 0, 1, 2, 3, 4, 5, 6, 7]
Uniqe stem-color: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1]
Uniqe veil-type: [-1, 0]
Uniqe veil-color: [-1, 0, 1, 2, 3, 4, 5]
Uniqe has-ring: [0, 1, -1]
Uniqe ring-type: [0, 1, 2, -1, 3, 4, 5, 6, 7]
Uniqe spore-print-color: [-1, 0, 1, 2, 3, 4, 5, 6]
Uniqe habitat: [0, 1, 2, 3, 4, 5, 6, 7, -1]
Uniqe season: [0, 1, 2, 3]


In [20]:
xsz = X.shape[0]
X = df.loc[:xsz-1]
assert X.shape[0] == xsz

X_val = df.loc[xsz:]
assert X_val.shape[0]+X.shape[0] == df.shape[0]

X_val

Unnamed: 0_level_0,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,stem-width,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
3116945,8.64,1,-1,5,1,-1,-1,0,11.13,17.12,0,-1,0,0,1,1,5,-1,0,0
3116946,6.90,4,4,1,0,-1,0,4,1.27,10.75,-1,-1,2,-1,-1,0,0,-1,0,0
3116947,2.00,3,6,5,0,-1,0,1,6.18,3.14,-1,-1,2,-1,-1,0,0,-1,0,3
3116948,3.47,1,4,5,0,2,0,1,4.98,8.51,-1,-1,0,-1,0,1,1,-1,0,2
3116949,6.17,1,1,7,0,6,-1,4,6.73,13.70,-1,-1,3,-1,3,1,-1,-1,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5194904,0.88,1,6,4,0,0,1,0,2.67,1.35,-1,-1,4,-1,-1,0,0,-1,0,2
5194905,3.12,1,0,4,0,3,0,0,2.69,7.38,-1,-1,0,-1,-1,0,0,-1,2,0
5194906,5.73,1,5,6,0,0,-1,0,6.16,9.74,-1,-1,3,-1,1,1,1,-1,0,0
5194907,5.03,3,6,5,0,0,1,2,6.00,3.46,-1,1,8,-1,-1,0,0,-1,0,0


In [21]:
X.dtypes

cap-diameter            float64
cap-shape                 int64
cap-surface               int64
cap-color                 int64
does-bruise-or-bleed      int64
gill-attachment           int64
gill-spacing              int64
gill-color                int64
stem-height             float64
stem-width              float64
stem-root                 int64
stem-surface              int64
stem-color                int64
veil-type                 int64
veil-color                int64
has-ring                  int64
ring-type                 int64
spore-print-color         int64
habitat                   int64
season                    int64
dtype: object

In [22]:
X_val.dtypes

cap-diameter            float64
cap-shape                 int64
cap-surface               int64
cap-color                 int64
does-bruise-or-bleed      int64
gill-attachment           int64
gill-spacing              int64
gill-color                int64
stem-height             float64
stem-width              float64
stem-root                 int64
stem-surface              int64
stem-color                int64
veil-type                 int64
veil-color                int64
has-ring                  int64
ring-type                 int64
spore-print-color         int64
habitat                   int64
season                    int64
dtype: object

# 5. Splitting data into train and test datasets
In order to have a good understanding of our models' score, we have to split our dataset into train and test datasets. This way, during training, we can train our model on train data and monitor its' performance on test data (unseen dataset). 

Another very good (and in many cases often better) idea is to use cross validation and split our dataset into `k=5` or `k=10` parts, but in this case it wasn't necessary as training score turned out to be very similar to public score.

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=RAND, stratify=y)

In [24]:
df_train = pd.concat([X_train, y_train], axis=1)
df_test = pd.concat([X_test, y_test], axis=1)
df_train

Unnamed: 0_level_0,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,stem-width,...,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season,class
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2413129,16.16,0,6,1,0,4,-1,0,16.89,17.66,...,0,0,0,1,1,5,-1,0,2,1
1910266,3.42,4,-1,10,0,5,2,5,3.62,15.31,...,3,8,-1,-1,0,0,-1,0,2,1
1213509,8.11,0,1,8,0,-1,0,0,11.25,15.03,...,-1,0,0,1,1,5,-1,0,2,1
387249,6.67,5,10,7,1,1,0,6,7.32,16.53,...,-1,0,-1,-1,0,0,-1,0,0,1
2498558,12.23,0,2,5,1,4,-1,0,10.18,16.80,...,-1,0,-1,-1,1,-1,-1,0,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2258055,3.84,1,1,5,0,0,0,1,4.82,6.10,...,0,2,-1,-1,1,1,-1,0,2,1
1115921,6.29,1,2,5,0,4,0,0,5.64,9.88,...,0,2,-1,-1,0,0,-1,0,0,0
2414138,8.62,0,0,4,0,4,0,6,5.36,12.68,...,-1,0,-1,1,1,2,-1,5,0,0
1624246,6.36,1,0,1,0,3,1,7,6.24,13.02,...,-1,0,-1,-1,0,0,-1,2,1,0


In [25]:
df_train.dtypes

cap-diameter            float64
cap-shape                 int64
cap-surface               int64
cap-color                 int64
does-bruise-or-bleed      int64
gill-attachment           int64
gill-spacing              int64
gill-color                int64
stem-height             float64
stem-width              float64
stem-root                 int64
stem-surface              int64
stem-color                int64
veil-type                 int64
veil-color                int64
has-ring                  int64
ring-type                 int64
spore-print-color         int64
habitat                   int64
season                    int64
class                     int64
dtype: object

In [26]:
df_train.isna().sum()

cap-diameter            4
cap-shape               0
cap-surface             0
cap-color               0
does-bruise-or-bleed    0
gill-attachment         0
gill-spacing            0
gill-color              0
stem-height             0
stem-width              0
stem-root               0
stem-surface            0
stem-color              0
veil-type               0
veil-color              0
has-ring                0
ring-type               0
spore-print-color       0
habitat                 0
season                  0
class                   0
dtype: int64

In [27]:
df_test.isna().sum()

cap-diameter            0
cap-shape               0
cap-surface             0
cap-color               0
does-bruise-or-bleed    0
gill-attachment         0
gill-spacing            0
gill-color              0
stem-height             0
stem-width              0
stem-root               0
stem-surface            0
stem-color              0
veil-type               0
veil-color              0
has-ring                0
ring-type               0
spore-print-color       0
habitat                 0
season                  0
class                   0
dtype: int64

In [28]:
X_val

Unnamed: 0_level_0,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,stem-width,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
3116945,8.64,1,-1,5,1,-1,-1,0,11.13,17.12,0,-1,0,0,1,1,5,-1,0,0
3116946,6.90,4,4,1,0,-1,0,4,1.27,10.75,-1,-1,2,-1,-1,0,0,-1,0,0
3116947,2.00,3,6,5,0,-1,0,1,6.18,3.14,-1,-1,2,-1,-1,0,0,-1,0,3
3116948,3.47,1,4,5,0,2,0,1,4.98,8.51,-1,-1,0,-1,0,1,1,-1,0,2
3116949,6.17,1,1,7,0,6,-1,4,6.73,13.70,-1,-1,3,-1,3,1,-1,-1,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5194904,0.88,1,6,4,0,0,1,0,2.67,1.35,-1,-1,4,-1,-1,0,0,-1,0,2
5194905,3.12,1,0,4,0,3,0,0,2.69,7.38,-1,-1,0,-1,-1,0,0,-1,2,0
5194906,5.73,1,5,6,0,0,-1,0,6.16,9.74,-1,-1,3,-1,1,1,1,-1,0,0
5194907,5.03,3,6,5,0,0,1,2,6.00,3.46,-1,1,8,-1,-1,0,0,-1,0,0


# 6. Generating data files
After cleaning and splitting data, we can generate train, test and validation data files to use in models. We will use `parquet` files because it greatly enhances pandas' reading/writing time compared to normal `csv` files.

In [29]:
df_train.to_parquet('train.parquet', index=True)
df_test.to_parquet('test.parquet', index=True)
X_val.to_parquet('val.parquet', index=True)