# Load CSV Files with the Python Standard Library #

For CSV files, consider:
- File header, if available can be used as column names
- Comments, if available, needs to be notfied usually indicated by (#) hash
- Delimiter, if available, needs to be notified usually indicated by (,) comma
- Quotes, if available to de-mark field values with spaces, can use (") double quotes

In [1]:
import csv

In [2]:
import numpy

In [3]:
_filename = '/Users/faisalmemon/ai/github/path-to-ai/machine-learning-mastery/data/pima-indians-diabetes.data.csv'

In [4]:
_file = open(_filename, 'r')

In [5]:
_raw = csv.reader(_file, delimiter=',', quoting=csv.QUOTE_NONE)

In [6]:
_raw

<_csv.reader at 0x10f2e2668>

In [7]:
_list = list(_raw)

In [8]:
_file.close()

In [9]:
_data = numpy.array(_list).astype('float')

In [10]:
_data.shape

(768, 9)

In [11]:
_data[:3,:]

array([[   6.   ,  148.   ,   72.   ,   35.   ,    0.   ,   33.6  ,
           0.627,   50.   ,    1.   ],
       [   1.   ,   85.   ,   66.   ,   29.   ,    0.   ,   26.6  ,
           0.351,   31.   ,    0.   ],
       [   8.   ,  183.   ,   64.   ,    0.   ,    0.   ,   23.3  ,
           0.672,   32.   ,    1.   ]])

# Load CSV Files with NumPy #

In [12]:
from numpy import loadtxt

In [13]:
_file = open(_filename, 'r')

In [14]:
_data = loadtxt(_file, delimiter=',')

In [15]:
_file.close()

In [16]:
_data.shape

(768, 9)

In [17]:
type(_data)

numpy.ndarray

In [18]:
_data[:3,:]

array([[   6.   ,  148.   ,   72.   ,   35.   ,    0.   ,   33.6  ,
           0.627,   50.   ,    1.   ],
       [   1.   ,   85.   ,   66.   ,   29.   ,    0.   ,   26.6  ,
           0.351,   31.   ,    0.   ],
       [   8.   ,  183.   ,   64.   ,    0.   ,    0.   ,   23.3  ,
           0.672,   32.   ,    1.   ]])

In [19]:
# load using uri
import requests

In [20]:
_uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data'

In [21]:
_data = requests.get(_uri)

In [22]:
_data = loadtxt(_data.iter_lines(), delimiter=',')

In [23]:
_data.shape

(768, 9)

In [24]:
type(_data)

numpy.ndarray

In [25]:
_data[:3,:]

array([[   6.   ,  148.   ,   72.   ,   35.   ,    0.   ,   33.6  ,
           0.627,   50.   ,    1.   ],
       [   1.   ,   85.   ,   66.   ,   29.   ,    0.   ,   26.6  ,
           0.351,   31.   ,    0.   ],
       [   8.   ,  183.   ,   64.   ,    0.   ,    0.   ,   23.3  ,
           0.672,   32.   ,    1.   ]])

# Load CSV Files with Pandas #

In [26]:
# load using file
from pandas import read_csv

In [27]:
_col_names = ['preg','plas','pres','skin','test','mass','pedi','age','class']

In [28]:
_data = read_csv(_filename, names=_col_names)

In [29]:
_data.shape

(768, 9)

In [30]:
type(_data)

pandas.core.frame.DataFrame

In [31]:
_data.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [32]:
# load using uri
_data = read_csv(_uri,names=_col_names)

In [33]:
_data.shape

(768, 9)

In [34]:
type(_data)

pandas.core.frame.DataFrame

In [35]:
_data.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
