In [None]:
!apt-get install mongodb >log
!service mongodb start
!pip install openml

In [None]:
import numpy as np
import pymongo
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import seaborn as sns
import openml
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
import json
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import warnings
from pymongo import MongoClient
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings("ignore")

client = MongoClient()
client.list_database_names()

mydb = client["Cleve"]
mycol = mydb["Dados"]

dataset = openml.datasets.get_dataset(40710)

x, y, categorical_indicator, attribute_names = dataset.get_data(
    dataset_format="dataframe", target=dataset.default_target_attribute)

info = dataset.get_data(dataset_format = "dataframe", target = dataset.default_target_attribute)
df = pd.DataFrame(info[0])

y_DF = pd.DataFrame(y)

dados = x[["Age", "Max_heart_rate"]]

dados['class'] = y_DF

dados = dados.dropna()
print(dados)

sns.scatterplot(x="Age", y="Max_heart_rate", hue="class", data=dados)
plt.show()

X = dados[["Age", "Max_heart_rate"]]
y = dados["class"]

SEED = 10
np.random.seed(SEED)

treino_x, teste_x, treino_y, teste_y = train_test_split(
    x, y, test_size=0.3, stratify=y)
print(f'Usamos {len(treino_x)} para treino e {len(teste_x)} de elementos para teste')


modelo = LinearSVC()
modelo.fit(treino_x, treino_y.values.ravel())
previsoes = modelo.predict(teste_x)
acuracia = accuracy_score(teste_y, previsoes) * 100
print(f'A acurácia foi de {acuracia:.2f} %')

dic = {"Teste": len(teste_x),"Treino": len(treino_x), "Acuracia": acuracia}
mycol.insert_one(dic)

for dado in mycol.find():
    print(dado)

In [None]:
args = {
    'batch_size' : 20,
    'num_works' : 4,
    'lr' : 5e-5,
    'weight_decay' : 5e-4,
    'num_epochs' : 30
}

if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

In [None]:
torch.manual_seed(1)
indices = torch.randperm(len(df)).tolist()
treino = int(0.8*len(df))

df_train = df.iloc[indices[:treino]]
df_teste = df.iloc[indices[treino:]]

col = mydb["treino"]
df_train = json.loads(df_train.to_json())
col.insert_one(df_train)

col = mydb["teste"]
df_teste = json.loads(df_teste.to_json())
col.insert_one(df_teste)

class Creve(Dataset):
  def __new__(cls, a, *args, **kwargs):
        instance = super(Creve, cls).__new__(cls, *args, **kwargs)
        return instance

  def __init__(self, csv_path, scaler_feat = None, scaler_label = None, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.data = pd.read_csv(csv_path).to_numpy()

  def __getitem__(self, index):
    sample = self.data[index][::]
    label  = self.data[index][-1:]
    sample = torch.from_numpy(sample)
    label  = torch.from_numpy(label)
    return sample, label
    
  def __len__(self):
    return len(self.data)


train_set = Creve('treino.csv')
test_set = Creve('teste.csv')

train_loader = DataLoader(train_set,
                           args['batch_size'],
                           num_workers = args['num_works'],
                           shuffle = True)

test_loader = DataLoader(test_set,
                           args['batch_size'],
                           num_workers = args['num_works'],
                           shuffle = False)

In [None]:
class Classifier(nn.Module):

  def __init__(self, input_size, hidden_size, out_size):
    super(Classifier, self).__init__()

    self.features = nn.Sequential(
          nn.Linear(input_size, hidden_size),
          nn.ReLU(),
          nn.Linear(hidden_size, hidden_size),
          nn.ReLU(),
    )

    self.classifier = nn.Sequential(
        nn.Linear(hidden_size, out_size),
        nn.ReLU(),
    )

  def forward(self, X):

    hidden = self.features(X)
    output = self.classifier(hidden)

    return output

input_size  = train_set[0][0].size(0)
hidden_size = 32
out_size    = 2

In [None]:
net = Classifier(input_size, hidden_size, out_size).to(device)
criterion = nn.L1Loss().to(device)
optimizer = optim.Adam(net.parameters(), lr = args['lr'], weight_decay = args['weight_decay'])

In [None]:
def train(train_loader, net, epoch):

    net.train()

    epoch_loss  = []

    for batch in train_loader:
        dado, rotulo = batch

        dado = dado.to(device)
        rotulo = rotulo.to(device)

        pred = net(dado)
        loss = criterion(pred, rotulo)
        epoch_loss.append(loss.cpu().data)
        loss.backward()
        optimizer.step()

    epoch_loss = np.asarray(epoch_loss)

    print(f'Época: {epoch}')
    print(f'Média: {epoch_loss.mean()}')
    print(f'Desvio padrão: {epoch_loss.std()}')
def test(test_loader, net, epoch):

    net.eval()
    with torch.no_grad():
        epoch_loss = []
        for batch in test_loader:

            dado, rotulo = batch

            dado = dado.to(device)
            rotulo = rotulo.to(device)

            pred = net(dado)
            loss = criterion(pred, rotulo)

            epoch_loss.append(loss.cpu().data)

        epoch_loss = np.asarray(epoch_loss)

        print(f'Época: {epoch}')
        print(f'Média: {epoch_loss.mean()}')
        print(f'Desvio padrão: {epoch_loss.std()}')

In [None]:
for epoch in range(args['num_epochs']):
    train(train_loader, net, epoch) # RuntimeError: expected scalar type Float but found Double
    test(test_loader, net, epoch)
    print('-'*16)

In [None]:
features = [0, 9]

targets = y

scaler = StandardScaler()
plt.scatter(df.Age, df.Max_heart_rate, c = targets, s = 25, cmap = plt.cm.brg, edgecolor='k')
plt.xlabel("Age")
plt.ylabel("Max heart rate")

In [None]:
def plot_boundary(X, y, model):
  x_min, x_max = X[:, 0].min()-0.1, X[:, 0].max()+0.1
  y_min, y_max = X[:, 1].min()-0.1, X[:, 1].max()+0.1
  
  spacing = min(x_max - x_min, y_max - y_min) / 100
  
  XX, YY = np.meshgrid(np.arange(x_min, x_max, spacing),
                       np.arange(y_min, y_max, spacing))
  
  data = np.hstack((XX.ravel().reshape(-1,1), 
                    YY.ravel().reshape(-1,1)))
  
  db_prob = model(torch.Tensor(data).to(args['device']) )
  clf = np.argmax(db_prob.cpu().data.numpy(), axis = -1)
  
  Z = clf.reshape(XX.shape)
  
  plt.contourf(XX, YY, Z, cmap = plt.cm.brg)
  plt.scatter(X[:,0], X[:,1], c = y, edgecolors = 'k', s = 25, cmap = plt.cm.brg)

In [None]:
input_size = df.shape[1]
hidden_size = 32
output_size = 2

Xtns = torch.FloatTensor(df.shape).to(device) 
Ytns = torch.LongTensor(y).to(device) # too many dimensions 'str'

net = Classifier(input_size, hidden_size, output_size).to(device)

criterion = nn.L1Loss().to(device)
optimizer = optim.Adam(net.parameters(), lr = 1e-3)

for i in range(100):
  pred = net(X)
  loss = criterion(pred, Y)
  loss.backward()
  optimizer.step()

  if i % 10 == 0:
    plt.figure()
    plot_boundary(data, targets, net)