## Bibliotecas Usadas

In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
import json
import random
from sklearn import datasets
from sklearn.model_selection import train_test_split

## Leitura dos dados

A função abaixo abre os arquivos onde estão salvos os dados e os salva em três listas de features.

In [7]:
def readData(path):
    try:
        f = open(path)
    except:
        print("Houston we've got a file problem")
        return
    dataset = [[],[],[]]
    for i in f:
        stringList = i.split()
        try:
            for j in range(len(dataset)):
                if j < len(stringList):
                    dataset[j].append(float(stringList[j]))
                else:
                    f.close()
                    return
        except ValueError:
            print("You had a Value Error")
            f.close()
            return
        except:
            print("You got another Error")
            f.close()
            return
    f.close()
    return dataset

Esta função separa os dados em conjunto de treino e conjunto de teste.

In [10]:
def splitData(dataset):
    trainset = [0,1,2]
    testset = [0,1,2]
    trainset[0], testset[0], trainset[1], testset[1], trainset[2], testset[2] = train_test_split(dataset[0], dataset[1], dataset[2], test_size=0.1, random_state=7)
    return trainset, testset

Esta outra transforma as listas de features em uma lista de pontos.

In [16]:
def getPointList(dataset):
    pointList = []
    for i in range(len(dataset[0])):
        point = []
        for j in dataset:
            point.append(j[i])
        pointList.append(point)
    return pointList

In [8]:
dataset = readData("../data/kick1.dat")
print(dataset)

[[-0.21, -0.305, -0.368, -0.37, -0.48, -0.532, -0.575, -0.583, -0.618, -0.702, -0.783, -0.842, -0.882, -0.93, -0.969, -0.989, -1.032, -1.088, -1.104, -1.192], [2.048, 1.999, 1.968, 1.902, 1.841, 1.807, 1.733, 1.69, 1.649, 1.611, 1.568, 1.523, 1.438, 1.408, 1.34, 1.295, 1.244, 1.201, 1.159, 1.109], [0.145, 0.103, 0.149, 0.11, 0.135, 0.111, 0.12, 0.111, 0.108, 0.117, 0.107, 0.104, 0.139, 0.142, 0.111, 0.107, 0.118, 0.119, 0.126, 0.11]]


In [9]:
dataset2 = readData("../data/kick2.dat")
print(dataset2)

[[-0.105, -0.162, -0.184, -0.185, -0.23, -0.266, -0.287, -0.311, -0.309, -0.331, -0.391, -0.431, -0.441, -0.445, -0.484, -0.514, -0.516, -0.524, -0.552, -0.596], [2.048, 1.999, 1.968, 1.902, 1.841, 1.807, 1.733, 1.69, 1.649, 1.611, 1.568, 1.523, 1.438, 1.408, 1.34, 1.295, 1.244, 1.201, 1.159, 1.109], [0.145, 0.177, 0.172, 0.167, 0.192, 0.219, 0.213, 0.223, 0.25, 0.244, 0.233, 0.269, 0.283, 0.29, 0.263, 0.29, 0.28, 0.281, 0.296, 0.269]]


In [11]:
trainset, testset = splitData(dataset)
print(trainset)
print(testset)

[[-0.368, -0.532, -0.842, -0.21, -1.104, -0.575, -0.93, -1.192, -0.783, -0.969, -0.618, -1.032, -0.702, -0.882, -0.583, -0.37, -0.48, -0.989], [1.968, 1.807, 1.523, 2.048, 1.159, 1.733, 1.408, 1.109, 1.568, 1.34, 1.649, 1.244, 1.611, 1.438, 1.69, 1.902, 1.841, 1.295], [0.149, 0.111, 0.104, 0.145, 0.126, 0.12, 0.142, 0.11, 0.107, 0.111, 0.108, 0.118, 0.117, 0.139, 0.111, 0.11, 0.135, 0.107]]
[[-0.305, -1.088], [1.999, 1.201], [0.103, 0.119]]


In [12]:
trainset2, testset2 = splitData(dataset2)
print(trainset2)
print(testset2)

[[-0.184, -0.266, -0.431, -0.105, -0.552, -0.287, -0.445, -0.596, -0.391, -0.484, -0.309, -0.516, -0.331, -0.441, -0.311, -0.185, -0.23, -0.514], [1.968, 1.807, 1.523, 2.048, 1.159, 1.733, 1.408, 1.109, 1.568, 1.34, 1.649, 1.244, 1.611, 1.438, 1.69, 1.902, 1.841, 1.295], [0.172, 0.219, 0.269, 0.145, 0.296, 0.213, 0.29, 0.269, 0.233, 0.263, 0.25, 0.28, 0.244, 0.283, 0.223, 0.167, 0.192, 0.29]]
[[-0.162, -0.524], [1.999, 1.201], [0.177, 0.281]]


In [17]:
trainsetP = getPointList(trainset)
testsetP = getPointList(testset)
print(trainsetP)
print(testsetP)

[[-0.368, 1.968, 0.149], [-0.532, 1.807, 0.111], [-0.842, 1.523, 0.104], [-0.21, 2.048, 0.145], [-1.104, 1.159, 0.126], [-0.575, 1.733, 0.12], [-0.93, 1.408, 0.142], [-1.192, 1.109, 0.11], [-0.783, 1.568, 0.107], [-0.969, 1.34, 0.111], [-0.618, 1.649, 0.108], [-1.032, 1.244, 0.118], [-0.702, 1.611, 0.117], [-0.882, 1.438, 0.139], [-0.583, 1.69, 0.111], [-0.37, 1.902, 0.11], [-0.48, 1.841, 0.135], [-0.989, 1.295, 0.107]]
[[-0.305, 1.999, 0.103], [-1.088, 1.201, 0.119]]


In [18]:
trainsetP2 = getPointList(trainset2)
testsetP2 = getPointList(testset2)
print(trainsetP2)
print(testsetP2)

[[-0.184, 1.968, 0.172], [-0.266, 1.807, 0.219], [-0.431, 1.523, 0.269], [-0.105, 2.048, 0.145], [-0.552, 1.159, 0.296], [-0.287, 1.733, 0.213], [-0.445, 1.408, 0.29], [-0.596, 1.109, 0.269], [-0.391, 1.568, 0.233], [-0.484, 1.34, 0.263], [-0.309, 1.649, 0.25], [-0.516, 1.244, 0.28], [-0.331, 1.611, 0.244], [-0.441, 1.438, 0.283], [-0.311, 1.69, 0.223], [-0.185, 1.902, 0.167], [-0.23, 1.841, 0.192], [-0.514, 1.295, 0.29]]
[[-0.162, 1.999, 0.177], [-0.524, 1.201, 0.281]]
