In [4]:
import numpy as np
import pandas as pd

### Object Creation

#### A DataFrame
A dataframe represents a rectangular table of data and contains an ordered collection of columns, each of which can be a different value type. A dataframe has both a row and column index and can be thought of as a dict of Series, all sharing the same index.
A column in a dataframe can be retrieved as a Series.
The columns of the resulting DataFrame have different dtypes.

In [5]:
csv_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = pd.read_csv(csv_url, header = None)

The csv file at the UCI repository does not contain the variable names.
They are located in a separate file

In [6]:
col_names = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width','Species']

# read in the dataset from the UCI Machine Learning Repository link and specify column names to use
# save as iris_df
iris =  pd.read_csv(csv_url, names = col_names)

In [7]:
# The columns of the resulting DataFrame have different dtypes.
iris.dtypes

Sepal_Length    float64
Sepal_Width     float64
Petal_Length    float64
Petal_Width     float64
Species          object
dtype: object

### Viewing Data
Can view the top and bottom of the DataFrame, the index and the column names

In [8]:
iris.head()

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [9]:
iris.tail()

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


In [10]:
# View the index of the DataFrame
iris.index

RangeIndex(start=0, stop=150, step=1)

In [12]:
# View the columns of the DataFrame
iris.columns

Index(['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',
       'Species'],
      dtype='object')

In [13]:
# sorting by an axis
iris.sort_index(axis=1, ascending=False).head(10)

Unnamed: 0,Species,Sepal_Width,Sepal_Length,Petal_Width,Petal_Length
0,Iris-setosa,3.5,5.1,0.2,1.4
1,Iris-setosa,3.0,4.9,0.2,1.4
2,Iris-setosa,3.2,4.7,0.2,1.3
3,Iris-setosa,3.1,4.6,0.2,1.5
4,Iris-setosa,3.6,5.0,0.2,1.4
5,Iris-setosa,3.9,5.4,0.4,1.7
6,Iris-setosa,3.4,4.6,0.3,1.4
7,Iris-setosa,3.4,5.0,0.2,1.5
8,Iris-setosa,2.9,4.4,0.2,1.4
9,Iris-setosa,3.1,4.9,0.1,1.5


In [14]:
# sorting by values
iris.sort_values(by='Petal_Width').head(10)

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
32,5.2,4.1,1.5,0.1,Iris-setosa
13,4.3,3.0,1.1,0.1,Iris-setosa
37,4.9,3.1,1.5,0.1,Iris-setosa
9,4.9,3.1,1.5,0.1,Iris-setosa
12,4.8,3.0,1.4,0.1,Iris-setosa
34,4.9,3.1,1.5,0.1,Iris-setosa
0,5.1,3.5,1.4,0.2,Iris-setosa
27,5.2,3.5,1.5,0.2,Iris-setosa
28,5.2,3.4,1.4,0.2,Iris-setosa
29,4.7,3.2,1.6,0.2,Iris-setosa


### Selection

#### Getting
can select a single column, which yields a Series.
Selecting via [] slices the rows.

It is recommended to use the optimized pandas data access methods, .at, .iat, .loc and .iloc.

In [15]:
# Selecting a single column, which yields a Series, equivalent to df.Sepal_Length

# can use labels in the index to select values or a set of values
iris['Sepal_Length'].head()



0    5.1
1    4.9
2    4.7
3    4.6
4    5.0
Name: Sepal_Length, dtype: float64

In [16]:
# Selecting via [], which slices the rows.
# Looking here at the first 5 rows
iris[0:5]

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### Selection by Label

In [17]:
# Selecting on a multi-axis by label
iris.loc[0:10, ['Sepal_Length', 'Petal_Length']]

Unnamed: 0,Sepal_Length,Petal_Length
0,5.1,1.4
1,4.9,1.4
2,4.7,1.3
3,4.6,1.5
4,5.0,1.4
5,5.4,1.7
6,4.6,1.4
7,5.0,1.5
8,4.4,1.4
9,4.9,1.5


In [18]:
# reduction in the dimensions of the returned object
iris.loc[0, ['Sepal_Length', 'Petal_Length']]

Sepal_Length    5.1
Petal_Length    1.4
Name: 0, dtype: object

In [20]:
# a scalar value
iris.loc[0, 'Petal_Length']

1.4

In [21]:
# retrieve a column of the dataframe using a dict-like notation
iris['Petal_Length'].head()

0    1.4
1    1.4
2    1.3
3    1.5
4    1.4
Name: Petal_Length, dtype: float64

In [22]:
# retrieve a column of data by attribute
iris.Sepal_Length.head()

0    5.1
1    4.9
2    4.7
3    4.6
4    5.0
Name: Sepal_Length, dtype: float64

In [23]:
# as a series using the loc attribute
iris.loc[0]

Sepal_Length            5.1
Sepal_Width             3.5
Petal_Length            1.4
Petal_Width             0.2
Species         Iris-setosa
Name: 0, dtype: object

### Selection by position

In [24]:
# Selection by position
iris.iloc[0:3, 0:4]

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2


In [25]:
# getting a scalar value by position using iat
iris.iat[0,0]

5.1

In [1]:
True + True

2

In [2]:
iris.dim()

NameError: name 'iris' is not defined

In [11]:
iris.plot

<pandas.plotting._core.FramePlotMethods object at 0x11a9fe390>