# Install python dependencies

In [None]:
pip install --upgrade pip

In [None]:
pip install pandas scikit-learn Keras tensorflow

In [None]:
pip install tensorflow==2.2.0

# Imports dependencies.

In [None]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import cross_val_score

# Load dataset and preprocessing of inconclusive values

In [None]:
data = pd.read_csv("dataset/automobile_data.csv", encoding = 'ISO-8859-1')
data.head()

In [None]:
data = data.drop('dateCrawled', axis = 1)
data = data.drop('dateCreated', axis = 1)
data = data.drop('nrOfPictures', axis = 1)
data = data.drop('postalCode', axis = 1)
data = data.drop('lastSeen', axis = 1)
data = data.drop('name', axis = 1)
data = data.drop('seller', axis = 1)
data = data.drop('offerType', axis = 1)

data = data[data.price > 10]
data = data.loc[data.price < 350000]

valores = {'vehicleType': 'limousine', 'gearbox': 'manuell',
           'model': 'golf', 'fuelType': 'benzin',
           'notRepairedDamage': 'nein'}
data = data.fillna(value = valores)
data.head()

In [None]:
data_entry = data.iloc[:, 1:13].values
data_class = data.iloc[:, 0].values  # Real price

# Transforming labels into numerical values (Standardization of values)

In [None]:
label_encoder = LabelEncoder()

data_entry[:, 0] = label_encoder.fit_transform(data_entry[:, 0])
data_entry[:, 1] = label_encoder.fit_transform(data_entry[:, 1])
data_entry[:, 2] = label_encoder.fit_transform(data_entry[:, 2])
data_entry[:, 3] = label_encoder.fit_transform(data_entry[:, 3])
data_entry[:, 4] = label_encoder.fit_transform(data_entry[:, 4])
data_entry[:, 5] = label_encoder.fit_transform(data_entry[:, 5])
data_entry[:, 6] = label_encoder.fit_transform(data_entry[:, 6])
data_entry[:, 7] = label_encoder.fit_transform(data_entry[:, 7])
data_entry[:, 8] = label_encoder.fit_transform(data_entry[:, 8])
data_entry[:, 9] = label_encoder.fit_transform(data_entry[:, 9])
data_entry[:, 10] = label_encoder.fit_transform(data_entry[:, 10])

data_entry[0:5]

In [None]:
len(data_entry[0])

In [None]:
one_hot_encoder = ColumnTransformer(transformers=[("OneHot", OneHotEncoder(), [0, 1, 3, 5, 8, 9, 10])], remainder='passthrough')
data_entry = one_hot_encoder.fit_transform(data_entry).toarray()

In [None]:
len(data_entry[0])

# Model configuration for training

In [None]:
def create_rede():
    # Sequential model.
    _classifier = Sequential()
    # Connected NN layer (units=158 -> (316 +1)/2 )
    _classifier.add(Dense(units=158, activation='relu', input_dim=316))
    _classifier.add(Dense(units=158, activation='relu'))
    _classifier.add(Dense(units=1, activation='linear'))
    # Configures the model for training
    # mean_absolute_error = 'Computes the mean of absolute difference between labels and predictions'
    _classifier.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mean_absolute_error'])
    return _classifier

# Implementation of the scikit-learn classifier API for Keras (cross_val_score)

In [None]:
classifier = KerasClassifier(build_fn=create_rede, epochs=100, batch_size=300)

In [None]:
# cv -> cross-validation
result = cross_val_score(estimator=classifier, X=data_entry, y=data_class, cv=10, scoring='neg_mean_absolute_error')

In [43]:
media = result.mean()
standard_deviation = result.std()

In [44]:
print(media)
print(standard_deviation)

-5904.834892506982
44.18171054958715


In [45]:
print(result)

[-5896.47116616 -5947.49149712 -5957.66984887 -5951.3539759
 -5905.29146372 -5932.56054997 -5900.03913273 -5901.90959949
 -5827.85298784 -5827.70870328]
