In [None]:
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

SEED = 5
URL = "https://gist.githubusercontent.com/guilhermesilveira/4d1d4a16ccbf6ea4e0a64a38a24ec884/raw/afd05cb0c796d18f3f5a6537053ded308ba94bf7/car-prices.csv"

data = pd.read_csv(URL)

### Serializing data
column_names_to_rename = {
    'mileage_per_year': 'milhas_por_ano',
    'model_year': 'ano_do_modelo',
    'price': 'preco',
    'sold': 'vendido',
}

column_values_to_rename = {
    'yes': 1,
    'no': 0,
}

data = data.rename(columns=column_names_to_rename)

data.vendido = data.vendido.map(column_values_to_rename)

actual_year = datetime.today().year
data['idade_do_modelo'] = actual_year - data.ano_do_modelo

data['km_por_ano'] = data.milhas_por_ano * 1.60934

data = data.drop(columns=['Unnamed: 0', 'milhas_por_ano', 'ano_do_modelo'], axis=1)

### Training and testing data
x = data[['preco', 'idade_do_modelo', 'km_por_ano']]
y = data['vendido']

np.random.seed(SEED)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, stratify=y)

print('Training with %d items & testing with %d items' % (len(x_train), len(x_test)))

model = LinearSVC()
model.fit(x_train, y_train)
predictions = model.predict(x_test)

accuracy = accuracy_score(y_test, predictions) * 100

print('Accuracy is %.2f%%' % accuracy)


