# Contents
1. Imports
2. Checks
3. Wrangling
4. Cleaning
5. Exports

# 1. Imports

In [23]:
#Libraries
import pandas as pd
import numpy as np
import os

In [24]:
#Path
path = r'/Users/davidgriesel/Documents/GitHub/202409_OGS'

In [25]:
#Dataset
df_departments = pd.read_csv(os.path.join(path, '02 - Data', 'Original Data', 'departments.csv'))

# 2. Checks

In [26]:
#Dimensions
df_departments.shape

(1, 22)

In [27]:
#Preview
df_departments.head()

Unnamed: 0,department_id,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,department,frozen,other,bakery,produce,alcohol,international,beverages,pets,dry goods pasta,...,meat seafood,pantry,breakfast,canned goods,dairy eggs,household,babies,snacks,deli,missing


##### Observations:
- Dataset contains variables in rows, and records incolumns

# 3. Wrangling

## 3.1. Transpose dataframe

In [28]:
#Transpose and view results
df_departments.T

Unnamed: 0,0
department_id,department
1,frozen
2,other
3,bakery
4,produce
5,alcohol
6,international
7,beverages
8,pets
9,dry goods pasta


In [29]:
#Transpose and update the dataframe with results
df_departments = df_departments.T

In [30]:
#Reset the index
df_departments.reset_index()

Unnamed: 0,index,0
0,department_id,department
1,1,frozen
2,2,other
3,3,bakery
4,4,produce
5,5,alcohol
6,6,international
7,7,beverages
8,8,pets
9,9,dry goods pasta


In [31]:
#Assign first row to new variable
new_headers = df_departments.iloc[0]

In [32]:
#View the created variable
new_headers

0    department
Name: department_id, dtype: object

In [33]:
#Select all rows from 1 onward and update the dataframe with results
df_departments = df_departments[1:]

In [34]:
#View updated dataframe
df_departments

Unnamed: 0,0
1,frozen
2,other
3,bakery
4,produce
5,alcohol
6,international
7,beverages
8,pets
9,dry goods pasta
10,bulk


In [35]:
#Assign new column names
df_departments.columns = new_headers

In [36]:
#Display results
df_departments

department_id,department
1,frozen
2,other
3,bakery
4,produce
5,alcohol
6,international
7,beverages
8,pets
9,dry goods pasta
10,bulk


##### Observations:
- Dataset successfully transposed

# 4. Cleaning

In [37]:
#View dataset
print(df_departments)

department_id       department
1                       frozen
2                        other
3                       bakery
4                      produce
5                      alcohol
6                international
7                    beverages
8                         pets
9              dry goods pasta
10                        bulk
11               personal care
12                meat seafood
13                      pantry
14                   breakfast
15                canned goods
16                  dairy eggs
17                   household
18                      babies
19                      snacks
20                        deli
21                     missing


In [38]:
df_departments.dtypes

department_id
department    object
dtype: object

##### Observations:
- No inaccuracies, missing values, variables with mixed type data, or duplicate records identified

# 5. Exports

In [39]:
#Confirm dimensions
df_departments.shape

(21, 1)

In [40]:
#Export cleaned dataset
df_departments.to_pickle(os.path.join(path, '02 - Data', 'Prepared Data', '03_cleaned_departments.pkl'))