# Pandas Pt.1

## Dictionary to DataFrame

In [1]:
# DataFrame is one of the most important data structures
# its a way to store tabular data where you can label the rows and columns

# import pandas
import pandas as pd

# we will now constructing the DataFrame from lists to dictionary
names = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
dr    = [True, False, False, False, True, True, True]
cpc   = [809, 731, 588, 18, 200, 70, 45]

# Create dictionary my_dict with three key:value pairs: my_dict
my_dict = {
    'country': names,
    'drives_right': dr,
    'cars_per_cap': cpc
}

# Build a DataFrame from the dictionary
cars = pd.DataFrame(my_dict)
print(cars)

         country  drives_right  cars_per_cap
0  United States          True           809
1      Australia         False           731
2          Japan         False           588
3          India         False            18
4         Russia          True           200
5        Morocco          True            70
6          Egypt          True            45


In [2]:
# when you see the above data, the row was set to integer from 0 to 6
# we can set the row label using .index property
row_labels = ['US', 'AUS', 'JAP', 'IN', 'RU', 'MOR', 'EG']
cars.index = row_labels

print(cars)

           country  drives_right  cars_per_cap
US   United States          True           809
AUS      Australia         False           731
JAP          Japan         False           588
IN           India         False            18
RU          Russia          True           200
MOR        Morocco          True            70
EG           Egypt          True            45


## CSV to DataFrame

In [3]:
# importing CSV data into Python using read_csv()
cars = pd.read_csv('datasets/cars.csv')
print(cars)

  Unnamed: 0  cars_per_cap        country  drives_right
0         US           809  United States          True
1        AUS           731      Australia         False
2        JAP           588          Japan         False
3         IN            18          India         False
4         RU           200         Russia          True
5        MOR            70        Morocco          True
6         EG            45          Egypt          True


In [4]:
# we need to fix the index to not 0 - 6 but on the first column
cars = pd.read_csv('datasets/cars.csv', index_col=0)
print(cars)

     cars_per_cap        country  drives_right
US            809  United States          True
AUS           731      Australia         False
JAP           588          Japan         False
IN             18          India         False
RU            200         Russia          True
MOR            70        Morocco          True
EG             45          Egypt          True


# Pandas Pt.2

## Slicing and selecting DataFrames with square brackets

In [5]:
# using square brackets is the simplest selection method
# Print out country column as Pandas Series
print(cars['country'])

# Print out country column as Pandas DataFrame
print(cars[['country']])

# Seleting 2 columns is easy
print(cars[['country', 'drives_right']])

# selecting rows (or observations) is also easy, with single brackets you will get the DataFrame
# do note that you can only select rows using square brackets if you specify a slice and integer indexes
print(cars[:3])

# print the rest of the rows
print(cars[3:6])

US     United States
AUS        Australia
JAP            Japan
IN             India
RU            Russia
MOR          Morocco
EG             Egypt
Name: country, dtype: object
           country
US   United States
AUS      Australia
JAP          Japan
IN           India
RU          Russia
MOR        Morocco
EG           Egypt
           country  drives_right
US   United States          True
AUS      Australia         False
JAP          Japan         False
IN           India         False
RU          Russia          True
MOR        Morocco          True
EG           Egypt          True
     cars_per_cap        country  drives_right
US            809  United States          True
AUS           731      Australia         False
JAP           588          Japan         False
     cars_per_cap  country  drives_right
IN             18    India         False
RU            200   Russia          True
MOR            70  Morocco          True


## loc and iloc

In [13]:
# loc and iloc can help you to do any data selection
# loc is label based
# iloc is index based

# Print out observation for Japan
print(cars.loc[['JAP']])

# selecting two observation
print(cars.loc[['AUS', 'EG']])

# 2D selection array
print(cars.loc[['MOR'], ['drives_right']])

# selecting a value
print(cars.loc['IN', 'cars_per_cap'])

# Selecting DataFrame
print(cars.loc[['IN'], ['cars_per_cap']])

# Print sub-DataFrame
print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])

# Print sub-DataFrame with index
print(cars.iloc[[3, 4], [0, 1]])

     cars_per_cap country  drives_right
JAP           588   Japan         False
     cars_per_cap    country  drives_right
AUS           731  Australia         False
EG             45      Egypt          True
     drives_right
MOR          True
18
    cars_per_cap
IN            18
     country  drives_right
RU    Russia          True
MOR  Morocco          True
    cars_per_cap country
IN            18   India
RU           200  Russia


In [16]:
# Selecting all rows but with specified column can be done with a colon
# as Pandas Series
print(cars.loc[:, 'drives_right'])

# as Pandas DataType
print(cars.loc[:, ['drives_right']])

# selecting 2 columns
print(cars.loc[:, ['drives_right', 'cars_per_cap']])

US      True
AUS    False
JAP    False
IN     False
RU      True
MOR     True
EG      True
Name: drives_right, dtype: bool
     drives_right
US           True
AUS         False
JAP         False
IN          False
RU           True
MOR          True
EG           True
     drives_right  cars_per_cap
US           True           809
AUS         False           731
JAP         False           588
IN          False            18
RU           True           200
MOR          True            70
EG           True            45
