In [13]:
from csv import reader
from math import sqrt
from random import seed
from random import randrange

# Load a CSV file
def load_csv(filename):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

# Convert string column to float
def str_column_to_float(dataset, column):
    for row in dataset:
        row[column] = float(row[column].strip())

# Find the min and max values for each column
def dataset_minmax(dataset):
    minmax = list()
    for i in range(len(dataset[0])):
        col_values = [row[i] for row in dataset]
        value_min = min(col_values)
        value_max = max(col_values)
        minmax.append([value_min, value_max])

    return minmax

# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
    for row in dataset:
        for i in range(len(row)):
            row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

# calculate column means
def column_means(dataset):
    means = [0 for i in range(len(dataset[0]))]
    for i in range(len(dataset[0])):
        col_values = [row[i] for row in dataset]
        means[i] = sum(col_values) / float(len(dataset))
    return means

# calculate column standard deviations
def column_stdevs(dataset, means):
    stdevs = [0 for i in range(len(dataset[0]))]
    for i in range(len(dataset[0])):
        variance = [pow(row[i]-means[i], 2) for row in dataset]
        stdevs[i] = sum(variance)

    stdevs = [sqrt(x/(float(len(dataset)-1))) for x in
    stdevs]
    return stdevs

# standardize dataset
def standardize_dataset(dataset, means, stdevs):
    for row in dataset:
        for i in range(len(row)):
            row[i] = (row[i] - means[i]) / stdevs[i]

# Split a dataset into a train and test set
def train_test_split(dataset, split=0.60):
    train = list()
    train_size = split * len(dataset)
    dataset_copy = list(dataset)
    while len(train) < train_size:
        index = randrange(len(dataset_copy))
        train.append(dataset_copy.pop(index))
    return train, dataset_copy

In [14]:
dataset = load_csv("wine.csv")

for i in range(14):
  str_column_to_float(dataset, i)

for row in dataset[:10]:
   print(row)

[1.0, 14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 1.04, 3.92, 1065.0]
[1.0, 13.2, 1.78, 2.14, 11.2, 100.0, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 3.4, 1050.0]
[1.0, 13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 5.68, 1.03, 3.17, 1185.0]
[1.0, 14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 7.8, 0.86, 3.45, 1480.0]
[1.0, 13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 1.04, 2.93, 735.0]
[1.0, 14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 6.75, 1.05, 2.85, 1450.0]
[1.0, 14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 3.58, 1290.0]
[1.0, 14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 5.05, 1.06, 3.58, 1295.0]
[1.0, 14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 2.85, 1045.0]
[1.0, 13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 7.22, 1.01, 3.55, 1045.0]


In [15]:
minmax = dataset_minmax(dataset)

In [16]:
means = column_means(dataset)

In [17]:
stdevs = column_stdevs(dataset, means)

In [18]:
dataset_copy = list(dataset)
normalize_dataset(dataset_copy, minmax)

In [19]:
dataset_copy = list(dataset)
standardize_dataset(dataset_copy, means, stdevs)

In [20]:
train_test_split(dataset)

([[-1.8556610550173243,
   -15.699603576572638,
   -1.955137654982922,
   -6.559909729056812,
   -5.661612196208445,
   -6.971312789095728,
   -3.408228520200377,
   -1.839385859199199,
   2.7018903161723493,
   -2.107141010456899,
   -2.0964358721523273,
   -2.1614009698114645,
   -2.9665060294226855,
   -2.37139679919894],
  [-1.8556610550173243,
   -15.579666106566048,
   -2.0223614624953092,
   -7.0667093257786755,
   -5.707917398336272,
   -6.970551747417945,
   -2.2787320480906903,
   -1.4486445416291691,
   -1.9979084918228807,
   -1.9748642447473543,
   -2.0651517246055833,
   -1.5567265361738065,
   -2.853002977379785,
   -2.371464749325885],
  [-1.8556610550173243,
   -15.579666106566048,
   -2.008209081966385,
   -6.28701763851427,
   -5.721808958974618,
   -6.96218028896233,
   -2.873783848031403,
   -1.5436897269840413,
   0.7310069450775754,
   -1.8591220747515027,
   -2.0596309926855696,
   -1.6634337891686874,
   -3.1470790667636632,
   -2.3708803782341574],
  [-1.85566