## Dictionary to DataFrame


In [None]:
# Pre-defined lists
names = ["United States", "Australia", "Japan", "India", "Russia", "Morocco", "Egypt"]
dr = [True, False, False, False, True, True, True]
cpc = [809, 731, 588, 18, 200, 70, 45]

# Import pandas as pd
import pandas as pd

# Create dictionary my_dict with three key:value pairs: my_dict
my_dict = {"country": names, "drives_right": dr, "cars_per_cap": cpc}

# Build a DataFrame cars from my_dict: cars
cars = pd.DataFrame(my_dict)

# Definition of row_labels
row_labels = ["US", "AUS", "JPN", "IN", "RU", "MOR", "EG"]

# Specify row labels of cars
cars.index = row_labels

# Print cars
print(cars)

## CSV to DataFrame


In [None]:
# Import pandas as pd
import pandas as pd

# Fix import by including index_col
cars = pd.read_csv("datasets/cars.csv")
# Print out cars
print(cars)
print("----------------------------------------------")
# Specify the index_col argument inside pd.read_csv(): set it to 0, so that the first column is used as row labels.
cars2 = pd.read_csv("datasets/cars.csv", index_col=0)
print(cars2)

## Col access using brackets


In [None]:
import pandas as pd

cars = pd.read_csv("datasets/cars.csv", index_col=0)

print(cars)
print("----------------------------------------------")
print(cars[["country", "drives_right"]])
print("----------------------------------------------")
print(cars[["country"]])
print("----------------------------------------------")

## Row access using brackets


In [None]:
import pandas as pd

cars = pd.read_csv("datasets/cars.csv", index_col=0)

print(cars)
print("----------------------------------------------")
print(cars[1:4])
print("----------------------------------------------")

## Rows & Cols access using loc & iloc

`[Subsetting DataFrame]`


In [None]:
import pandas as pd

cars = pd.read_csv("datasets/cars.csv", index_col=0)

print(cars)
print("----------------------------------------------")


# Access row by label
print(cars.loc[["RU"]])
print("----------------------------------------------")
print(cars.iloc[[4]])
print("----------------------------------------------")

# print as series
print(cars.loc["RU"])
print("----------------------------------------------")

# print as dataframe
print(cars.loc[["RU"]])
print("----------------------------------------------")


# Access multiple rows by label
print(cars.loc[["RU", "IN", "EG"]])
print("----------------------------------------------")
print(cars.iloc[[4, 3, 6]])
print("----------------------------------------------")


# Access row and column by label
print(cars.loc[["RU", "IN", "EG"], ["country", "drives_right"]])
print("----------------------------------------------")
print(cars.iloc[[4, 3, 6], [1, 2]])
print("----------------------------------------------")


# All rows, some columns
print(cars.loc[:, ["country", "drives_right"]])
print("----------------------------------------------")
print(cars.iloc[:, [1, 2]])
print("----------------------------------------------")


# specific cell
print(cars.loc[["RU"], ["country"]])
print("----------------------------------------------")

# Sub-dataframe
print(cars.loc[["RU", "IN", "EG"], ["country", "drives_right"]])
print("----------------------------------------------")

## Filtering from dataframe based on a condition


In [None]:
# Import cars data
import pandas as pd

cars = pd.read_csv("datasets/cars.csv", index_col=0)

# Extract drives_right column as Series: dr
dr = cars["drives_right"]
print(dr)
print("----------------------------------------------")

# Use dr to subset cars: sel
sel = dr == True

# Print sel
print(cars[sel])

In [None]:
# Import cars data
import pandas as pd

cars = pd.read_csv("datasets/cars.csv", index_col=0)

# Create car_maniac: observations that have a cars_per_cap over 500
cpc = cars["cars_per_cap"]
many_cars = cpc > 500
car_maniac = cars[many_cars]

# Print car_maniac
print(car_maniac)

In [None]:
# Import cars data
import pandas as pd

cars = pd.read_csv("datasets/cars.csv", index_col=0)

# Import numpy, you'll need this
import numpy as np

# Create medium: observations with cars_per_cap between 100 and 500
cpc = cars["cars_per_cap"]
between = np.logical_and(cpc > 100, cpc < 500)
medium = cars[between]

# Print medium
print(medium)

## Iterate over a dataframe


In [None]:
# Import cars data
import pandas as pd

cars = pd.read_csv("datasets/cars.csv", index_col=0)

# Iterate over rows of cars
for label, row in cars.iterrows():
    print(label)
    print(row)

In [None]:
# Import cars data
import pandas as pd

cars = pd.read_csv("datasets/cars.csv", index_col=0)

# Adapt for loop
for lab, row in cars.iterrows():
    print(f"{lab}: {row['cars_per_cap']}")

## Add column using `loc`


In [None]:
# Import cars data
import pandas as pd

cars = pd.read_csv("datasets/cars.csv", index_col=0)
print(cars)
print("----------------------------------------------")
# Code for loop that adds COUNTRY column
for label, row in cars.iterrows():
    cars.loc[label, "COUNTRY"] = row["country"].upper()

# Print cars
print(cars)

## Add column using `apply()`


In [None]:
# Import cars data
import pandas as pd

cars = pd.read_csv("datasets/cars.csv", index_col=0)
print(cars)
print("----------------------------------------------")
# Use .apply(str.upper) beacuse .upper() is a method
cars["COUNTRY"] = cars["country"].apply(str.upper)
# use .apply(len) because len() is a function
cars["country length"] = cars["country"].apply(len)

print(cars)