# 4.4 - Data Wrangling & Subsetting







### This script contains the following points:

* 01 - Importing Libraries
* 02 - Importing Data
* 03 - Wrangling Procedures
    * a) Looking for missing values (NaN)
    * b) Renaming column names
    * c) Changing Variable’s Data Types
* 04 - Transposing Datasets
* 05 - Creating Subsets
* 06 - 4.4 - Data Wrangling & Subsetting
* 07 - Exporting Data

---

# 01 - Importing Libraries

In [1]:
# Importing Libraries

import numpy as np
import pandas as pd
import os

---

# 02 - Importing Data

In [2]:
# Define path

path = r'/Users/juanigalvalisi/01-07-2022 - Instacart Basket Analysis/'

In [3]:
# Import Data

df_ords = pd.read_csv(os.path.join(path, '02 - Data', 'Original Data', 'orders.csv'))
df_prods = pd.read_csv(os.path.join(path, '02 - Data', 'Original Data', 'products.csv'))
df_ords_prods_prior = pd.read_csv(os.path.join(path, '02 - Data', 'Original Data', 'order_products_prior.csv'))

In [4]:
# Check the output df_ords

df_ords.head()

Unnamed: 0,order_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
0,2539329,1,prior,1,2,8,
1,2398795,1,prior,2,3,7,15.0
2,473747,1,prior,3,3,12,21.0
3,2254736,1,prior,4,4,7,29.0
4,431534,1,prior,5,4,15,28.0


In [5]:
# Check the output df_prods

df_prods.head()

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices
0,1,Chocolate Sandwich Cookies,61,19,5.8
1,2,All-Seasons Salt,104,13,9.3
2,3,Robust Golden Unsweetened Oolong Tea,94,7,4.5
3,4,Smart Ones Classic Favorites Mini Rigatoni Wit...,38,1,10.5
4,5,Green Chile Anytime Sauce,5,13,4.3


In [6]:
# Check the output df_ords_prods_prior

df_ords_prods_prior.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered
0,2,33120,1,1
1,2,28985,2,1
2,2,9327,3,0
3,2,45918,4,1
4,2,30035,5,0


---

# 03 - Wrangling procedures

In [7]:
# Drop 'eval_set' column from orders.csv

df_ords = df_ords.drop(columns = ['eval_set'])

#### a) Looking for missing values (NaN)

In [8]:
# 1. Looking for missing values (NaN) in days_since_prior_order variable

df_ords['days_since_prior_order'].value_counts(dropna = False)

30.0    369323
7.0     320608
6.0     240013
4.0     221696
3.0     217005
5.0     214503
NaN     206209
2.0     193206
8.0     181717
1.0     145247
9.0     118188
14.0    100230
10.0     95186
13.0     83214
11.0     80970
12.0     76146
0.0      67755
15.0     66579
16.0     46941
21.0     45470
17.0     39245
20.0     38527
18.0     35881
19.0     34384
22.0     32012
28.0     26777
23.0     23885
27.0     22013
24.0     20712
25.0     19234
29.0     19191
26.0     19016
Name: days_since_prior_order, dtype: int64

#### b) Renaming column names

In [9]:
# Rename column name

df_ords.rename(columns = {'order_dow' : 'orders_day_of_week'}, inplace = True)

In [10]:
# Check whether the code was executed successfully

df_ords.head()

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order
0,2539329,1,1,2,8,
1,2398795,1,2,3,7,15.0
2,473747,1,3,3,12,21.0
3,2254736,1,4,4,7,29.0
4,431534,1,5,4,15,28.0


#### c) Changing Variable’s Data Types

In [11]:
# Change the data type to 'str' because they act as a key

df_ords['order_id'] = df_ords['order_id'].astype('str')
df_ords['user_id'] = df_ords['user_id'].astype('str')

In [12]:
# Change the data type to reduce size

df_ords['order_number'] = df_ords['order_number'].astype('int8')
df_ords['orders_day_of_week'] = df_ords['orders_day_of_week'].astype('int8')
df_ords['order_hour_of_day'] = df_ords['order_hour_of_day'].astype('int8')
df_ords['days_since_prior_order'] = df_ords['days_since_prior_order'].astype('float16')

In [13]:
# Checking if changes were made

df_ords.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3421083 entries, 0 to 3421082
Data columns (total 6 columns):
 #   Column                  Dtype  
---  ------                  -----  
 0   order_id                object 
 1   user_id                 object 
 2   order_number            int8   
 3   orders_day_of_week      int8   
 4   order_hour_of_day       int8   
 5   days_since_prior_order  float16
dtypes: float16(1), int8(3), object(2)
memory usage: 68.5+ MB


---

## 4. Transposing Data

In [14]:
# Imoporting data set departments.csv

df_dep = pd.read_csv(r'/Users/juanigalvalisi/01-07-2022 - Instacart Basket Analysis/02 - Data/Original Data/departments.csv', index_col = False)

In [15]:
# Function Transposing Data (only view)

df_dep.T

Unnamed: 0,0
department_id,department
1,frozen
2,other
3,bakery
4,produce
5,alcohol
6,international
7,beverages
8,pets
9,dry goods pasta


In [16]:
# Function Transposing Data (overwrite)

df_dep_t = df_dep.T

In [17]:
# Check the output

df_dep.T

Unnamed: 0,0
department_id,department
1,frozen
2,other
3,bakery
4,produce
5,alcohol
6,international
7,beverages
8,pets
9,dry goods pasta


In [18]:
# To get rid of the “0” and turn the first row of your dataframe into your headers

df_dep_t.reset_index()

Unnamed: 0,index,0
0,department_id,department
1,1,frozen
2,2,other
3,3,bakery
4,4,produce
5,5,alcohol
6,6,international
7,7,beverages
8,8,pets
9,9,dry goods pasta


In [19]:
# Create a new header for your dataframe

In [20]:
# 1) Create a new header: Take the first row of table_name for the header

new_header = df_dep_t.iloc[0]

In [21]:
new_header

0    department
Name: department_id, dtype: object

In [22]:
# 2) Remove the first row (copy everything from the first row onward from 
# the df_dep_t dataframe into a new dataframe, df_dep_t_new)

df_dep_t_new = df_dep_t[1:]

In [23]:
# Check the output

df_dep_t_new

Unnamed: 0,0
1,frozen
2,other
3,bakery
4,produce
5,alcohol
6,international
7,beverages
8,pets
9,dry goods pasta
10,bulk


In [24]:
# 3) Add a new header: Tell Python to use the list of column names in your new_header variable as your new header

df_dep_t_new.columns = new_header

In [25]:
# Check the output

df_dep_t_new

department_id,department
1,frozen
2,other
3,bakery
4,produce
5,alcohol
6,international
7,beverages
8,pets
9,dry goods pasta
10,bulk


In [26]:
# Creating Dictionaries

data_dict = df_dep_t_new.to_dict('index')

In [27]:
# Check the output

data_dict

{'1': {'department': 'frozen'},
 '2': {'department': 'other'},
 '3': {'department': 'bakery'},
 '4': {'department': 'produce'},
 '5': {'department': 'alcohol'},
 '6': {'department': 'international'},
 '7': {'department': 'beverages'},
 '8': {'department': 'pets'},
 '9': {'department': 'dry goods pasta'},
 '10': {'department': 'bulk'},
 '11': {'department': 'personal care'},
 '12': {'department': 'meat seafood'},
 '13': {'department': 'pantry'},
 '14': {'department': 'breakfast'},
 '15': {'department': 'canned goods'},
 '16': {'department': 'dairy eggs'},
 '17': {'department': 'household'},
 '18': {'department': 'babies'},
 '19': {'department': 'snacks'},
 '20': {'department': 'deli'},
 '21': {'department': 'missing'}}

In [28]:
# Check the output

df_prods.head()

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices
0,1,Chocolate Sandwich Cookies,61,19,5.8
1,2,All-Seasons Salt,104,13,9.3
2,3,Robust Golden Unsweetened Oolong Tea,94,7,4.5
3,4,Smart Ones Classic Favorites Mini Rigatoni Wit...,38,1,10.5
4,5,Green Chile Anytime Sauce,5,13,4.3


In [29]:
print(data_dict.get('19'))

{'department': 'snacks'}


---

# 05 - Creating Subsets

In [30]:
# Create a subset for your df_prods dataframe that only contains data from the snacks department

df_snacks =  df_prods[df_prods['department_id'] == 19]

In [31]:
# Part I - Simply searches for the data in question (assigning each value either True or False depending on whether it meets the criteria)

df_prods['department_id'] == 19

0         True
1        False
2        False
3        False
4        False
         ...  
49688    False
49689    False
49690    False
49691    False
49692    False
Name: department_id, Length: 49693, dtype: bool

In [32]:
# Part II - A list of only those values within df_prods that are true

df_prods[df_prods['department_id'] == 19]

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices
0,1,Chocolate Sandwich Cookies,61,19,5.8
15,16,Mint Chocolate Flavored Syrup,103,19,5.2
24,25,Salted Caramel Lean Protein & Fiber Bar,3,19,1.9
31,32,Nacho Cheese White Bean Chips,107,19,4.9
40,41,Organic Sourdough Einkorn Crackers Rosemary,78,19,6.5
...,...,...,...,...,...
49666,49662,Bacon Cheddar Pretzel Pieces,107,19,3.6
49669,49665,Super Dark Coconut Ash & Banana Chocolate Bar,45,19,6.9
49670,49666,Ginger Snaps Snacking Cookies,61,19,5.2
49675,49671,Milk Chocolate Drops,45,19,3.0


In [33]:
# Part III - Full comamnd

df_snacks = df_prods[df_prods['department_id'] == 19]

In [34]:
df_snacks

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices
0,1,Chocolate Sandwich Cookies,61,19,5.8
15,16,Mint Chocolate Flavored Syrup,103,19,5.2
24,25,Salted Caramel Lean Protein & Fiber Bar,3,19,1.9
31,32,Nacho Cheese White Bean Chips,107,19,4.9
40,41,Organic Sourdough Einkorn Crackers Rosemary,78,19,6.5
...,...,...,...,...,...
49666,49662,Bacon Cheddar Pretzel Pieces,107,19,3.6
49669,49665,Super Dark Coconut Ash & Banana Chocolate Bar,45,19,6.9
49670,49666,Ginger Snaps Snacking Cookies,61,19,5.2
49675,49671,Milk Chocolate Drops,45,19,3.0


In [35]:
# OPTION B - Using LOC function

df_snacks_2 = df_prods.loc[df_prods['department_id'] == 19]

In [36]:
# Check the output

df_snacks_2

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices
0,1,Chocolate Sandwich Cookies,61,19,5.8
15,16,Mint Chocolate Flavored Syrup,103,19,5.2
24,25,Salted Caramel Lean Protein & Fiber Bar,3,19,1.9
31,32,Nacho Cheese White Bean Chips,107,19,4.9
40,41,Organic Sourdough Einkorn Crackers Rosemary,78,19,6.5
...,...,...,...,...,...
49666,49662,Bacon Cheddar Pretzel Pieces,107,19,3.6
49669,49665,Super Dark Coconut Ash & Banana Chocolate Bar,45,19,6.9
49670,49666,Ginger Snaps Snacking Cookies,61,19,5.2
49675,49671,Milk Chocolate Drops,45,19,3.0


In [37]:
# OPTION C - Using LOC + ISIN functions: tell the loc function to look into a list: isin([19])

df_snacks_3 = df_prods.loc[df_prods['department_id'].isin([19])]

In [38]:
# Check the output

df_snacks_3

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices
0,1,Chocolate Sandwich Cookies,61,19,5.8
15,16,Mint Chocolate Flavored Syrup,103,19,5.2
24,25,Salted Caramel Lean Protein & Fiber Bar,3,19,1.9
31,32,Nacho Cheese White Bean Chips,107,19,4.9
40,41,Organic Sourdough Einkorn Crackers Rosemary,78,19,6.5
...,...,...,...,...,...
49666,49662,Bacon Cheddar Pretzel Pieces,107,19,3.6
49669,49665,Super Dark Coconut Ash & Banana Chocolate Bar,45,19,6.9
49670,49666,Ginger Snaps Snacking Cookies,61,19,5.2
49675,49671,Milk Chocolate Drops,45,19,3.0


In [39]:
df_prods.loc[df_prods['aisle_id'] == 107]

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices
31,32,Nacho Cheese White Bean Chips,107,19,4.9
139,140,Chips Onion Chipotle Garlic,107,19,6.9
140,141,Restaurant Style Organic Chia & Quinoa Tortill...,107,19,2.9
165,166,Garlic Parmesan Pita Bread Chips,107,19,5.2
179,180,Simply Beyond Black Bean Tortilla Chips,107,19,3.0
...,...,...,...,...,...
49516,49512,Tiny Twists Prezels,107,19,5.1
49538,49534,Jalapeño Seasoned Potato Chips,107,19,5.0
49581,49577,Wavy Ranch Potato Chips,107,19,2.8
49622,49618,Mac n' Cheese Puffs,107,19,2.6


---

# 06 - 4.4 - Data Wrangling & Subsetting

In [40]:
# 2. Find another identifier variable in the df_ords dataframe that doesn’t need to be
# included in your analysis as a numeric variable and change it to a suitable format

In [41]:
df_ords.head(20)

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order
0,2539329,1,1,2,8,
1,2398795,1,2,3,7,15.0
2,473747,1,3,3,12,21.0
3,2254736,1,4,4,7,29.0
4,431534,1,5,4,15,28.0
5,3367565,1,6,2,7,19.0
6,550135,1,7,1,9,20.0
7,3108588,1,8,1,14,14.0
8,2295261,1,9,1,16,0.0
9,2550362,1,10,4,8,30.0


#### I) Data Wrangling to df_ords

In [42]:
# Change the data type to 'str' because they act as a key

df_ords['order_id'] = df_ords['order_id'].astype('str')
df_ords['user_id'] = df_ords['user_id'].astype('str')

In [43]:
# Change the data type to reduce size

df_ords['order_number'] = df_ords['order_number'].astype('int8')
df_ords['orders_day_of_week'] = df_ords['orders_day_of_week'].astype('int8')
df_ords['order_hour_of_day'] = df_ords['order_hour_of_day'].astype('int8')
df_ords['days_since_prior_order'] = df_ords['days_since_prior_order'].astype('float16')

In [44]:
# Checking if changes were made

df_ords.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3421083 entries, 0 to 3421082
Data columns (total 6 columns):
 #   Column                  Dtype  
---  ------                  -----  
 0   order_id                object 
 1   user_id                 object 
 2   order_number            int8   
 3   orders_day_of_week      int8   
 4   order_hour_of_day       int8   
 5   days_since_prior_order  float16
dtypes: float16(1), int8(3), object(2)
memory usage: 68.5+ MB


#### II) Data Wrangling to df_prods

In [45]:
# Change the data type to 'str' because they act as a key

df_prods['product_id'] = df_prods['product_id'].astype('str')
df_prods['aisle_id'] = df_prods['aisle_id'].astype('str')
df_prods['department_id'] = df_prods['department_id'].astype('str')

In [46]:
# Change the data type to reduce size

df_prods['prices'] = df_prods['prices'].astype('float32')

In [47]:
df_prods.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49693 entries, 0 to 49692
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   product_id     49693 non-null  object 
 1   product_name   49677 non-null  object 
 2   aisle_id       49693 non-null  object 
 3   department_id  49693 non-null  object 
 4   prices         49693 non-null  float32
dtypes: float32(1), object(4)
memory usage: 1.7+ MB


#### III) Data Wrangling to df_ords_prods_prior

In [48]:
# Change the data type to 'str' because they act as a key

df_ords_prods_prior['product_id'] = df_ords_prods_prior['product_id'].astype('str')
df_ords_prods_prior['order_id'] = df_ords_prods_prior['order_id'].astype('str')

In [49]:
# Change the data type to reduce size

df_ords_prods_prior['reordered'] = df_ords_prods_prior['reordered'].astype('int8')
df_ords_prods_prior['add_to_cart_order'] = df_ords_prods_prior['add_to_cart_order'].astype('int8')

In [50]:
df_ords_prods_prior.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32434489 entries, 0 to 32434488
Data columns (total 4 columns):
 #   Column             Dtype 
---  ------             ----- 
 0   order_id           object
 1   product_id         object
 2   add_to_cart_order  int8  
 3   reordered          int8  
dtypes: int8(2), object(2)
memory usage: 556.8+ MB


In [51]:
# Change the data type to 'str' because they act as a key

df_prods['product_id'] = df_prods['product_id'].astype('str')
df_prods['aisle_id'] = df_prods['aisle_id'].astype('str')
df_prods['department_id'] = df_prods['department_id'].astype('str')

# Change the data type to reduce size

df_prods['prices'] = df_prods['prices'].astype('float32')

df_prods.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49693 entries, 0 to 49692
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   product_id     49693 non-null  object 
 1   product_name   49677 non-null  object 
 2   aisle_id       49693 non-null  object 
 3   department_id  49693 non-null  object 
 4   prices         49693 non-null  float32
dtypes: float32(1), object(4)
memory usage: 1.7+ MB


In [52]:
# Changing 'days_since_prior_order' column

df_ords.rename(columns = {'days_since_prior_order': 'days_last_order'}, inplace = True)
df_ords.head()

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_last_order
0,2539329,1,1,2,8,
1,2398795,1,2,3,7,15.0
2,473747,1,3,3,12,21.0
3,2254736,1,4,4,7,29.0
4,431534,1,5,4,15,28.0


In [53]:
# 4. Your client wants to know what the busiest hour is for placing orders.
# Find the frequency of the corresponding variable and share your findings

In [54]:
df_ords['order_hour_of_day'].value_counts(dropna = False)

10    288418
11    284728
15    283639
14    283042
13    277999
12    272841
16    272553
9     257812
17    228795
18    182912
8     178201
19    140569
20    104292
7      91868
21     78109
22     61468
23     40043
6      30529
0      22758
1      12398
5       9569
2       7539
4       5527
3       5474
Name: order_hour_of_day, dtype: int64

The busiest time of day is between 9 and 17, especially from 10 to 11 and 14 to 15.

In [55]:
# 5. Determine the meaning behind a value of 4 in the "department_id" column
# within the df_prods dataframe using a data dictionary.

In [56]:
print(data_dict.get('4'))

{'department': 'produce'}


In [57]:
# 6. The sales team in your client’s organization wants to know more about breakfast item sales.
# Create a subset containing only the required information.

In [58]:
data_dict

{'1': {'department': 'frozen'},
 '2': {'department': 'other'},
 '3': {'department': 'bakery'},
 '4': {'department': 'produce'},
 '5': {'department': 'alcohol'},
 '6': {'department': 'international'},
 '7': {'department': 'beverages'},
 '8': {'department': 'pets'},
 '9': {'department': 'dry goods pasta'},
 '10': {'department': 'bulk'},
 '11': {'department': 'personal care'},
 '12': {'department': 'meat seafood'},
 '13': {'department': 'pantry'},
 '14': {'department': 'breakfast'},
 '15': {'department': 'canned goods'},
 '16': {'department': 'dairy eggs'},
 '17': {'department': 'household'},
 '18': {'department': 'babies'},
 '19': {'department': 'snacks'},
 '20': {'department': 'deli'},
 '21': {'department': 'missing'}}

In [59]:
print(data_dict.get('14'))

{'department': 'breakfast'}


In [60]:
# Filtering by the following criteria: department_id == 14

df_breakfast = df_prods[df_prods['department_id'] == 14]

In [61]:
# Check the output

df_breakfast

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices


In [62]:
# 7. They’d also like to see details about customers who might be throwing dinner parties. Your task is to find
# all observations from the entire dataframe that include items from the following departments: alcohol, deli,
# beverages, and meat/seafood. You’ll need to present this subset to your client

In [63]:
# Filtering by the following departments: alcohol, deli, beverages, and meat/seafood

df_dinner = df_prods.loc[df_prods['department_id'].isin([5, 7, 12, 20])]

In [64]:
df_dinner

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices


In [65]:
# 8. It’s important that you keep track of total counts in your dataframes.
# How many rows does the last dataframe you created have?

In [66]:
df_dinner.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 0 entries
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   product_id     0 non-null      object 
 1   product_name   0 non-null      object 
 2   aisle_id       0 non-null      object 
 3   department_id  0 non-null      object 
 4   prices         0 non-null      float32
dtypes: float32(1), object(4)
memory usage: 0.0+ bytes


Our new data frame, df_dinner, has 7650 rows and 5 columns in total.

In [67]:
# 9. Someone from the data engineers team in Instacart thinks they’ve spotted something strange about
# the customer with a "user_id" of “1.” Extract all the information you can about this user.

In [68]:
# Filtering by the following criteria: user_id == '1'

df_customer_one = df_ords.loc[df_ords['user_id'] == '1']

In [69]:
df_customer_one

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_last_order
0,2539329,1,1,2,8,
1,2398795,1,2,3,7,15.0
2,473747,1,3,3,12,21.0
3,2254736,1,4,4,7,29.0
4,431534,1,5,4,15,28.0
5,3367565,1,6,2,7,19.0
6,550135,1,7,1,9,20.0
7,3108588,1,8,1,14,14.0
8,2295261,1,9,1,16,0.0
9,2550362,1,10,4,8,30.0


In [70]:
# 10. You also need to provide some details about this user’s behavior.
# What basic stats can you provide based on the information you have?

In [71]:
df_customer_one.describe()

Unnamed: 0,order_number,orders_day_of_week,order_hour_of_day,days_last_order
count,11.0,11.0,11.0,10.0
mean,6.0,2.636364,10.090909,19.0
std,3.316625,1.286291,3.477198,9.03125
min,1.0,1.0,7.0,0.0
25%,3.5,1.5,7.5,14.25
50%,6.0,3.0,8.0,19.5
75%,8.5,4.0,13.0,26.25
max,11.0,4.0,16.0,30.0


In [72]:
df_customer_one['orders_day_of_week'].value_counts(dropna = False)

4    4
1    3
2    2
3    2
Name: orders_day_of_week, dtype: int64

In [73]:
df_customer_one['order_hour_of_day'].value_counts(dropna = False)

8     3
7     3
12    1
15    1
9     1
14    1
16    1
Name: order_hour_of_day, dtype: int64

Thursdays are the favorite days of customer one and, regarding the hours, from 7 to 8 am are the favorite time where the consumer has made the most consumption.

---

# 07 - Exporting Data

In [74]:
# 12. Export your df_ords dataframe as “orders_wrangled.csv” in your “Prepared Data” folder

In [75]:
# Export df_ords dataframe as .csv

df_ords.to_csv(os.path.join(path, '02 - Data', 'Prepared Data', 'orders_wrangled.csv'))

In [76]:
# 13. Export the df_dep_t_new dataframe as “departments_wrangled.csv” in your “Prepared Data” folder
# so that you have a “.csv” file of your departments data in the correct format

In [77]:
# Export df_dep_t_new as .csv

df_dep_t_new.to_csv(os.path.join(path, '02 - Data', 'Prepared Data', 'departments_wrangled.csv'))

In [78]:
# Export df_ords_prods_prior as .csv

df_ords_prods_prior.to_csv(os.path.join(path, '02 - Data', 'Prepared Data', 'orders_products_prior_wrangled.csv'))