In [None]:
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

SEED = 5
URL = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv"

data = pd.read_csv(URL)
np.random.seed(SEED)

### Serializing data
column_names_to_rename = {
    'mileage_per_year': 'milhas_por_ano',
    'model_year': 'ano_do_modelo',
    'price': 'preco',
    'sold': 'vendido',
}

column_values_to_rename = {
    'yes': 1,
    'no': 0,
}

data = data.rename(columns=column_names_to_rename)

data.vendido = data.vendido.map(column_values_to_rename)

actual_year = datetime.today().year
data['idade_do_modelo'] = actual_year - data.ano_do_modelo

data['km_por_ano'] = data.milhas_por_ano * 1.60934

data = data.drop(columns=['Unnamed: 0', 'milhas_por_ano', 'ano_do_modelo'], axis=1)

### Training AI and testing data
x = data[['preco', 'idade_do_modelo', 'km_por_ano']]
y = data['vendido']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, stratify=y)

print('Training with %d items & testing with %d items' % (len(x_train), len(x_test)))

In [None]:
from sklearn.svm import LinearSVC

linear = LinearSVC()
linear.fit(x_train, y_train)
predictions = linear.predict(x_test)

accuracy = accuracy_score(y_test, predictions) * 100

print('Accuracy is %.2f%%' % accuracy)

In [None]:
from sklearn.dummy import DummyClassifier

dummy_stratified = DummyClassifier(strategy='stratified')
dummy_stratified.fit(x_train, y_train)
accuracy = dummy_stratified.score(x_test, y_test) * 100

print('Accuracy is %.2f%%' % accuracy)

In [None]:
from sklearn.dummy import DummyClassifier

dummy_most_frequent = DummyClassifier(strategy='most_frequent')
dummy_most_frequent.fit(x_train, y_train)
accuracy = dummy_stratified.score(x_test, y_test) * 100

print('Accuracy is %.2f%%' % accuracy)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

raw_x_train, raw_x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, stratify=y, random_state=SEED)

print('Training with %d items & testing with %d items' % (len(x_train), len(x_test)))

scaler = StandardScaler()
scaler.fit(raw_x_train)
x_train = scaler.transform(raw_x_train)
x_test = scaler.transform(raw_x_test)

model = SVC()
model.fit(x_train, np.ravel(y_train))
predictions = model.predict(x_test)

accuracy = accuracy_score(y_test, predictions) * 100

print('Accuracy is %.2f%%' % accuracy)