In [1]:
# importar pacotes necessários
import numpy as np
import pandas as pd

In [2]:
# definir parâmetros extras
#pd.set_option('precision', 2)
pd.set_option('display.max_columns', 100)

In [3]:
# carregar arquivo de dados de treino
data = pd.read_csv('iris-train.csv', index_col='Id')

# mostrar alguns exemplos de registros
data.head()

Unnamed: 0_level_0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
47,5.1,3.8,1.6,0.2,Iris-setosa
37,5.5,3.5,1.3,0.2,Iris-setosa
50,5.0,3.3,1.4,0.2,Iris-setosa
79,6.0,2.9,4.5,1.5,Iris-versicolor
44,5.0,3.5,1.6,0.6,Iris-setosa


In [4]:
# importar pacotes usados na seleção do modelo e na medição da precisão
from sklearn.model_selection import train_test_split

# importar os pacotes necessários para os algoritmos de classificação
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [5]:
# definir dados de entrada

X = data.drop(['Species'], axis=1) # tudo, exceto a coluna alvo
y = data['Species'] # apenas a coluna alvo

print('Forma dos dados originais:', X.shape, y.shape)

Forma dos dados originais: (100, 4) (100,)


In [6]:
# separarar dados para fins de treino (70%) e de teste (30%)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print('Forma dos dados separados:', X_train.shape, X_test.shape, y_train.shape, y_test.shape)

Forma dos dados separados: (70, 4) (30, 4) (70,) (30,)


In [7]:
X_train.head()

Unnamed: 0_level_0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
67,5.6,3.0,4.5,1.5
130,7.2,3.0,5.8,1.6
99,5.1,2.5,3.0,1.1
4,4.6,3.1,1.5,0.2
46,4.8,3.0,1.4,0.3


In [8]:
y_train.head()

Id
67     Iris-versicolor
130     Iris-virginica
99     Iris-versicolor
4          Iris-setosa
46         Iris-setosa
Name: Species, dtype: object

In [9]:
# A) Support Vector Machine (SVM)

model = SVC()

model.fit(X_train, y_train)
score = model.score(X_test, y_test) * 100

print(model, '\nScore:', score, '%')

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False) 
Score: 100.0 %


In [10]:
# B) Logistic Regression

model = LogisticRegression()

model.fit(X_train, y_train)
score = model.score(X_test, y_test) * 100

print(model, '\nScore:', score, '%')

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False) 
Score: 90.0 %


In [11]:
# C) Decision Tree

model = DecisionTreeClassifier()

model.fit(X_train, y_train)
score = model.score(X_test, y_test) * 100

print(model, '\nScore:', score, '%')

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best') 
Score: 100.0 %


In [12]:
# D) K-Nearest Neighbours

model = KNeighborsClassifier(n_neighbors=3)

model.fit(X_train, y_train)
score = model.score(X_test, y_test) * 100

print(model, '\nScore:', score, '%')

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform') 
Score: 100.0 %
