# Pandas Fundamentals: Dataframes

Objective of this notebook is coding examples for fundamental Dataframe operations.



In [44]:
# Install required dependencies
#!pip install pandas pyarrow

In [45]:
import pandas as pd
from pandas.testing import assert_frame_equal

# Sample Dictionary
dict = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'San Francisco', 'Seattle']
}

# Globals
file_prefix = '01_dataframes'

## 1. Store and Load

---

### 1.1. Parquet

In [46]:
parquet_file = f'{file_prefix}.parquet'

# Write
persons_df = pd.DataFrame(dict)
persons_df.to_parquet(parquet_file)

# Read
parquet_df = pd.read_parquet(parquet_file)
parquet_df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Seattle


In [47]:
# Assert data frames are equal
assert_frame_equal(persons_df, parquet_df)

---

### 1.2. JSON

In [48]:
json_file = f'{file_prefix}.json'

# Write
persons_df = pd.DataFrame(dict)
persons_df.to_json(json_file, orient='records')

# Read
json_df = pd.read_json(json_file)
json_df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Seattle


In [49]:
# Assert data frames are equal
assert_frame_equal(persons_df, json_df)

---

### 1.3. CSV

In [50]:
csv_file = f'{file_prefix}.csv'

# Write
persons_df = pd.DataFrame(dict)
persons_df.to_csv(csv_file, index=False)

# Read
csv_df = pd.read_csv(csv_file)
csv_df


Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Seattle


In [51]:
# Assert data frames are equal
assert_frame_equal(persons_df, csv_df)

## 2. Filter Row Data

---

### 2.1. *iloc* property

In [52]:
# filter lines
persons_df = pd.DataFrame(dict)
us_west_df = persons_df.iloc[1:]
us_west_df

Unnamed: 0,Name,Age,City
1,Bob,30,San Francisco
2,Charlie,35,Seattle


In [53]:
persons_df = pd.DataFrame(dict)
us_north_df = pd.concat([persons_df.iloc[:1], persons_df.iloc[2:]], ignore_index=True)
us_north_df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Charlie,35,Seattle


In [54]:
persons_df = pd.DataFrame(dict)
young_anonymous_df = persons_df.iloc[:2, 1:3]
young_anonymous_df

Unnamed: 0,Age,City
0,25,New York
1,30,San Francisco


---

### 2.2. Conditionals

In [55]:
persons_df = pd.DataFrame(dict)
outside_silicon_valley_df = persons_df[persons_df['City'] != 'San Francisco']
outside_silicon_valley_df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
2,Charlie,35,Seattle


## 3. Column Transformations

---

### 3.1. Drop columns

In [56]:
persons_df = pd.DataFrame(dict)
anonymous_df = persons_df.drop('Name', axis=1)
anonymous_df

Unnamed: 0,Age,City
0,25,New York
1,30,San Francisco
2,35,Seattle


---
### 3.2. Append columns

In [57]:
persons_df = pd.DataFrame(dict)
occupation_df = pd.DataFrame({
    'Occupation': ['Lawyer', 'Engineer', 'Musician']
})
persons_df['Occupation'] = occupation_df['Occupation']
persons_df

Unnamed: 0,Name,Age,City,Occupation
0,Alice,25,New York,Lawyer
1,Bob,30,San Francisco,Engineer
2,Charlie,35,Seattle,Musician


---
### 3.3. *apply* method with lambda function

In [58]:
persons_df = pd.DataFrame(dict)
persons_df['Age Group'] = persons_df['Age'].apply(lambda age: 'Young' if age < 30 else 'Adult')
persons_df

Unnamed: 0,Name,Age,City,Age Group
0,Alice,25,New York,Young
1,Bob,30,San Francisco,Adult
2,Charlie,35,Seattle,Adult
