# Pandas Fundamentals: Dataframes

Objective of this notebook is coding examples for fundamental Dataframe operations.



In [1]:
# Install required dependencies
!pip install pandas pyarrow



In [2]:
import pandas as pd
from pandas.testing import assert_frame_equal

# Sample Dictionary
dict = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'San Francisco', 'Seattle']
}

# Globals
file_prefix = '01_dataframes'

## 1. Store and Load

---

### 1.1. Parquet

In [3]:
parquet_file = f'{file_prefix}.parquet'

# Write
dict_df = pd.DataFrame(dict)
dict_df.to_parquet(parquet_file)

# Read
parquet_df = pd.read_parquet(parquet_file)
parquet_df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Seattle


In [4]:
# Assert data frames are equal
assert_frame_equal(dict_df, parquet_df)

---

### 1.2. JSON

In [5]:
json_file = f'{file_prefix}.json'

# Write
dict_df = pd.DataFrame(dict)
dict_df.to_json(json_file, orient='records')

# Read
json_df = pd.read_json(json_file)
json_df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Seattle


In [6]:
# Assert data frames are equal
assert_frame_equal(dict_df, json_df)

---

### 1.3. CSV

In [7]:
csv_file = f'{file_prefix}.csv'

# Write
dict_df = pd.DataFrame(dict)
dict_df.to_csv(csv_file, index=False)

# Read
csv_df = pd.read_csv(csv_file)
csv_df


Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,San Francisco
2,Charlie,35,Seattle


In [8]:
# Assert data frames are equal
assert_frame_equal(dict_df, csv_df)

## 2. Filter Data

---

### 2.1. iloc

In [9]:
dict_df = pd.DataFrame(dict)

# filter lines
us_west_df = dict_df.iloc[1:]
us_west_df

Unnamed: 0,Name,Age,City
1,Bob,30,San Francisco
2,Charlie,35,Seattle


In [10]:
us_north_df = pd.concat([dict_df.iloc[:1], dict_df.iloc[2:]], ignore_index=True)
us_north_df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Charlie,35,Seattle


In [11]:
young_anonymous_df = dict_df.iloc[:2, 1:3]
young_anonymous_df

Unnamed: 0,Age,City
0,25,New York
1,30,San Francisco


---

### 2.2. Conditionals

In [12]:
outside_silicon_valley_df = dict_df[dict_df['City'] != 'San Francisco']
outside_silicon_valley_df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
2,Charlie,35,Seattle
