## Importing Packages

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings("ignore")

## Loading Data

In [2]:
data = pd.read_csv('abalone_dataset.csv')
sample = pd.read_csv('abalone_app.csv')
sample.head()

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight
0,M,0.6,0.48,0.175,1.229,0.4125,0.2735,0.415
1,F,0.545,0.385,0.15,1.1185,0.5425,0.2445,0.2845
2,F,0.645,0.52,0.18,1.285,0.5775,0.352,0.317
3,M,0.64,0.51,0.17,1.3715,0.567,0.307,0.409
4,F,0.655,0.54,0.215,1.5555,0.695,0.296,0.444


## Converting String feature to integer

In [5]:
def conv(sex):
    if sex == 'M':
        sex = 0
    elif sex == 'F':
        sex = 1
    else:
        sex = 2
        
    return sex

data['sex'] = data['sex'].apply(conv)
sample['sex'] = sample['sex'].apply(conv)
sample.head()

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight
0,0,0.6,0.48,0.175,1.229,0.4125,0.2735,0.415
1,1,0.545,0.385,0.15,1.1185,0.5425,0.2445,0.2845
2,1,0.645,0.52,0.18,1.285,0.5775,0.352,0.317
3,0,0.64,0.51,0.17,1.3715,0.567,0.307,0.409
4,1,0.655,0.54,0.215,1.5555,0.695,0.296,0.444


## Grouping data

In [6]:
features = data.columns.difference(['type'])

X = data[features]
y = data['type']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 2019, test_size = 0.2, train_size = 0.8, shuffle = True)
print(X_train.shape)
print(X_test.shape)

(2505, 8)
(627, 8)


## Creating the Classifier

In [7]:
# creating the RFClassifier
classifier_random_forest = RandomForestClassifier(random_state=2019,
                           criterion='entropy',
                           max_depth=6,
                           max_features=0.1,
                           bootstrap=True,
                           n_estimators=150,
                           )

# training the RFClassifier
classifier_random_forest.fit(X_train, y_train)
classifier_random_forest.score(X_test, y_test)

0.696969696969697

In [8]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(classifier_random_forest, X, y, scoring='accuracy', cv=5)
print(scores.mean())

0.64653931388982


In [None]:
import requests

#realizando previsões com o arquivo de
print(' - Aplicando modelo e enviando para o servidor')
y_pred = classifier_random_forest.predict(sample)

# Enviando previsões realizadas com o modelo para o servidor
URL = "https://aydanomachado.com/mlclass/03_Validation.php"

#TODO Substituir pela sua chave aqui
DEV_KEY = "VovóLearn"

# json para ser enviado para o servidor
data = {'dev_key':DEV_KEY,
        'predictions':pd.Series(y_pred).to_json(orient='values')}

# Enviando requisição e salvando o objeto resposta
r = requests.post(url = URL, data = data)

# Extraindo e imprimindo o texto da resposta
pastebin_url = r.text
print(" - Resposta do servidor:\n", r.text, "\n")