## Load CSV File

### Load CSV file

In [24]:
from csv import reader

def load_csv(file_name):
    dataset = list()

    with open(file_name, 'r') as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    
    return dataset

In [25]:
filename = 'pima-indians-diabetes.csv'
dataset = load_csv(filename)
print('Loaded data file {0} with {1} rows'.format(filename, len(dataset)))

print(dataset[0])  # print the first row to check

Loaded data file pima-indians-diabetes.csv with 768 rows
['6', '148', '72', '35', '0', '33.6', '0.627', '50', '1']


### Convert string to floats

In [26]:
def str_column_to_float(dataset, column):
    for row in dataset:
        row[column] = float(row[column].strip())

In [27]:
for i in range (len(dataset[0])):
    str_column_to_float(dataset, i)
print(dataset[0])  # print the first row to check

[6.0, 148.0, 72.0, 35.0, 0.0, 33.6, 0.627, 50.0, 1.0]


### Convert string to integer

In [29]:
def str_column_to_integer(dataset, column):
    class_value = [row[column] for row in dataset]
    unique = set(class_value)

    lookup = dict()

    for i, value in enumerate(unique):
        lookup[value] = i

    for row in dataset:
        row[column] = lookup[row[column]]

    return lookup

In [30]:
forestfires = load_csv('forestfires.csv')
forestfires[0]

['X',
 'Y',
 'month',
 'day',
 'FFMC',
 'DMC',
 'DC',
 'ISI',
 'temp',
 'RH',
 'wind',
 'rain',
 'area']

In [31]:
lookup = str_column_to_integer(forestfires, 2)
lookup

{'jun': 0,
 'month': 1,
 'jul': 2,
 'dec': 3,
 'sep': 4,
 'apr': 5,
 'may': 6,
 'mar': 7,
 'nov': 8,
 'feb': 9,
 'aug': 10,
 'oct': 11,
 'jan': 12}

## Extensions