In [1]:
from csv import reader

# Load dataset from CSV
def load_data(filename):
    dataset = list()
    with open(filename, "r") as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

# to Convert string Columns to floats
def string_to_float(dataset, col):
    for row in dataset:
        row[col] = float(row[col].strip())
        
# to convert string columns to integer
def string_to_int(dataset, column):
    class_values = [row[column] for row in dataset]
    unique = set(class_values)
    lookup = dict()
    for i, value in enumerate(unique):
        lookup[value] = i
    for row in dataset:
        row[column] = lookup[row[column]]
    return lookup

In [2]:
filename = 'pima-indians-diabetes.csv'
dataset = load_data(filename)
print('Loaded data file {0} with {1} rows and {2} columns'
      .format(filename, len(dataset), len(dataset[0])))

In [3]:
# Convert
for i in range(len(dataset[0])):
    string_to_float(dataset, i)
print(dataset[0])

In [4]:
filename = 'iris.csv'
dataset = load_data(filename)
print('Loaded data file {0} with {1} rows and {2} columns'
      .format(filename, len(dataset), len(dataset[0])))
print(dataset[0])

# convert class column to int
lookup = string_to_int(dataset, 4)
print(dataset[0])
print(lookup)

In [5]:
#############################################################
######                  USING NUMPY                    ######
#############################################################

import numpy as np

filename = 'iris.csv'
dataset = np.genfromtxt(filename, 
                        delimiter=',', 
                        dtype=None, 
                        encoding='ascii'
                       )
print(dataset[0])

lookup = string_to_int(dataset, 4)
print(dataset[0])
print(lookup)

np_dataset = np.loadtxt('pima-indians-diabetes.csv', 
                        dtype='int16', delimiter=',')

print(np_dataset[0])

(5.1, 3.5, 1.4, 0.2, 'Iris-setosa')
(5.1, 3.5, 1.4, 0.2, '1')
{'Iris-virginica': 0, 'Iris-setosa': 1, 'Iris-versicolor': 2}
[  6 148  72  35   0  33   0  50   1]


In [9]:
import csv

filename = 'pima-indians-diabetes.csv'
raw_data = open(filename, 'rt')
reader = csv.reader(raw_data, delimiter=',', 
                    quoting=csv.QUOTE_NONE) 
x = list(reader)
data = np.array(x).astype('float')
print(data.shape)


(768, 9)


In [10]:
# Load CSV from URL using NumPy

from numpy import loadtxt
from urllib.request import urlopen
url = 'https://goo.gl/bDdBiA'
raw_data = urlopen(url)
dataset = loadtxt(raw_data, delimiter=",") 
print(dataset.shape)

(768, 9)


In [12]:
# Load CSV using Pandas
from pandas import read_csv
filename = 'pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] 
data = read_csv(filename, names=names)
print(data.shape)
print(data.head())

(768, 9)
   preg  plas  pres  skin  test  mass   pedi  age  class
0     6   148    72    35     0  33.6  0.627   50      1
1     1    85    66    29     0  26.6  0.351   31      0
2     8   183    64     0     0  23.3  0.672   32      1
3     1    89    66    23    94  28.1  0.167   21      0
4     0   137    40    35   168  43.1  2.288   33      1
