# Pandas DataFrame

## Can create a DataFrame using existing dict objects

In [1]:
import pandas as pd

names = ['United States', 'Australia', 'Japan',
         'India', 'Russia', 'Morocco', 'Egypt']
dr = [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]

my_dict = {'country': names, 'drives_right': dr, 'cars_per_cap': cpc}

cars = pd.DataFrame(my_dict)

cars


Unnamed: 0,country,drives_right,cars_per_cap
0,United States,True,809
1,Australia,False,731
2,Japan,False,588
3,India,False,18
4,Russia,True,200
5,Morocco,True,70
6,Egypt,True,45


In [2]:
row_labels = ['US', 'AUS', 'JPN', 'IN', 'RU', 'MOR', 'EG']
cars.index = row_labels
cars


Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JPN,Japan,False,588
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


In [12]:
# Export DataFrame to .csv file
# pd.DataFrame.to_csv(cars, 'cars.csv')


## OR import data from .csv file

In [15]:
cars = pd.read_csv('cars.csv', index_col=0)
cars

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JPN,Japan,False,588
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


In [23]:
# Similar slicing rule as list [inclusive start:exclusive end:steps]
# But it is suggested to use pandas native methods for better processing 
# time, see following section "Select row as Series object"

cars[:4:2]

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
JPN,Japan,False,588


## Select column as Series Object

In [55]:
# Method 1 RECOMMENDED - better processing time
# %timeit cars['country'] 

cars['country']

US     United States
AUS        Australia
JPN            Japan
IN             India
RU            Russia
MOR          Morocco
EG             Egypt
Name: country, dtype: object

In [56]:
# Method 2
# %timeit cars.loc[:,'country']

cars.loc[:,'country']

US     United States
AUS        Australia
JPN            Japan
IN             India
RU            Russia
MOR          Morocco
EG             Egypt
Name: country, dtype: object

## Select column as DataFrame Object

In [57]:
cars[['country']]


Unnamed: 0,country
US,United States
AUS,Australia
JPN,Japan
IN,India
RU,Russia
MOR,Morocco
EG,Egypt


## Select row as Series object

In [60]:
# Method 1
# Similar execution time as method 2
# %timeit cars.loc['JPN']

cars.loc['JPN']


country         Japan
drives_right    False
cars_per_cap      588
Name: JPN, dtype: object

In [61]:
# Method 2
# %timeit cars.iloc[2]

cars.iloc[2]


country         Japan
drives_right    False
cars_per_cap      588
Name: JPN, dtype: object

## Select rows as DataFrame object

In [74]:
cars.loc[['US', 'AUS']]

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731


## Select a specific value

In [86]:
# Method 1
# Slightly faster than method 2

cars['drives_right']['MOR']

True

In [87]:
# Method 2

cars.loc['MOR', 'drives_right']

True

## Select a sub-DataFrame

In [95]:
cars.loc[['IN',"RU"],['country','cars_per_cap']]


Unnamed: 0,country,cars_per_cap
IN,India,18
RU,Russia,200


## Filter

In [131]:
# Basically, pass boolean series into a dataframe to select rows

print(cars['drives_right'])
cars[cars['drives_right']]

US      True
AUS    False
JPN    False
IN     False
RU      True
MOR     True
EG      True
Name: drives_right, dtype: bool


Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


## Iteration

In [132]:
cars

Unnamed: 0,country,drives_right,cars_per_cap
US,United States,True,809
AUS,Australia,False,731
JPN,Japan,False,588
IN,India,False,18
RU,Russia,True,200
MOR,Morocco,True,70
EG,Egypt,True,45


In [133]:
# For loop iterate through column names
for col in cars:
    print(col)

country
drives_right
cars_per_cap


In [162]:
# DataFrames.iteritems() iterate through columns
for name, col in cars.iteritems():
    print(name)
    print(col)
    print()

# Can apply indexing in col:

for name, col in cars.iteritems():
    print(col['US'])

country
US     United States
AUS        Australia
JPN            Japan
IN             India
RU            Russia
MOR          Morocco
EG             Egypt
Name: country, dtype: object

drives_right
US      True
AUS    False
JPN    False
IN     False
RU      True
MOR     True
EG      True
Name: drives_right, dtype: bool

cars_per_cap
US     809
AUS    731
JPN    588
IN      18
RU     200
MOR     70
EG      45
Name: cars_per_cap, dtype: int64

United States
True
809


In [170]:
# DataFrame.iterrows() iterate through rows

for lab, row in cars.iterrows():
    print(lab)
    print(row)
    print()

# Can select specific columns of each row

for lab, row in cars.iterrows():
    print(row["country"])

US
country         United States
drives_right             True
cars_per_cap              809
Name: US, dtype: object

AUS
country         Australia
drives_right        False
cars_per_cap          731
Name: AUS, dtype: object

JPN
country         Japan
drives_right    False
cars_per_cap      588
Name: JPN, dtype: object

IN
country         India
drives_right    False
cars_per_cap       18
Name: IN, dtype: object

RU
country         Russia
drives_right      True
cars_per_cap       200
Name: RU, dtype: object

MOR
country         Morocco
drives_right       True
cars_per_cap         70
Name: MOR, dtype: object

EG
country         Egypt
drives_right     True
cars_per_cap       45
Name: EG, dtype: object

United States
Australia
Japan
India
Russia
Morocco
Egypt


## Add new column

In [187]:
cars["double_cpc"] = cars["cars_per_cap"].apply(lambda x: x*2)
cars

Unnamed: 0,country,drives_right,cars_per_cap,double_cpc
US,United States,True,809,1618
AUS,Australia,False,731,1462
JPN,Japan,False,588,1176
IN,India,False,18,36
RU,Russia,True,200,400
MOR,Morocco,True,70,140
EG,Egypt,True,45,90
