In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier

## Load data

In [2]:
data = pd.read_csv('abalone_dataset.csv')
sample = pd.read_csv('abalone_app.csv')
data.head()

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,type
0,M,0.535,0.42,0.15,0.6995,0.2575,0.153,0.24,3
1,I,0.51,0.38,0.115,0.5155,0.215,0.1135,0.166,1
2,I,0.185,0.13,0.045,0.029,0.012,0.0075,0.0095,1
3,M,0.55,0.45,0.17,0.81,0.317,0.157,0.22,3
4,I,0.535,0.415,0.15,0.5765,0.3595,0.135,0.225,1


## Transform

In [3]:
def conv(sex):
    if sex == 'M':
        sex = 0
    elif sex == 'F':
        sex = 1
    else:
        sex = 2
        
    return sex

data['sex'] = data['sex'].apply(conv)
sample['sex'] = sample['sex'].apply(conv)
data.head()

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,type
0,0,0.535,0.42,0.15,0.6995,0.2575,0.153,0.24,3
1,2,0.51,0.38,0.115,0.5155,0.215,0.1135,0.166,1
2,2,0.185,0.13,0.045,0.029,0.012,0.0075,0.0095,1
3,0,0.55,0.45,0.17,0.81,0.317,0.157,0.22,3
4,2,0.535,0.415,0.15,0.5765,0.3595,0.135,0.225,1


## Separating

In [4]:
features = data.columns.difference(['type'])

X = data[features]
y = data['type']
samp = sample

## Creating and training a classifier

In [13]:
knn = KNeighborsClassifier(n_neighbors = 1,
                           weights = 'distance',
                           algorithm = 'auto',
                           p = 2)
knn.fit(X, y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='distance')

In [14]:
X_new = np.array([[0, 0.535, 0.420, 0.150, 0.6995, 0.2575, 0.1530, 0.2400]])
X_new.shape
knn.predict(X_new)

array([2])

In [25]:
from sklearn.model_selection import GridSearchCV

param_grid = {
            "algorithm": ['auto', 'ball_tree', 'kd_tree', 'brute'],
            "n_neighbors": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            "p": [1, 2],
            "weights": ['distance', 'uniform']
}
grid_search = GridSearchCV(knn, param_grid, scoring="accuracy")
grid_search.fit(X, y)

knn = grid_search.best_estimator_ 
grid_search.best_params_, grid_search.best_score_



({'algorithm': 'auto', 'n_neighbors': 10, 'p': 2, 'weights': 'distance'},
 0.6344189016602809)

In [26]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(knn, X, y, scoring='accuracy', cv=5)
print(scores.mean())

0.6388894133422729


In [None]:
import requests

#realizando previsões com o arquivo de
print(' - Aplicando modelo e enviando para o servidor')
y_pred = knn.predict(sample)

# Enviando previsões realizadas com o modelo para o servidor
URL = "https://aydanomachado.com/mlclass/03_Validation.php"

#TODO Substituir pela sua chave aqui
DEV_KEY = "VovóLearn"

# json para ser enviado para o servidor
data = {'dev_key':DEV_KEY,
        'predictions':pd.Series(y_pred).to_json(orient='values')}

# Enviando requisição e salvando o objeto resposta
#r = requests.post(url = URL, data = data)

# Extraindo e imprimindo o texto da resposta
pastebin_url = r.text
print(" - Resposta do servidor:\n", r.text, "\n")