## Using Python to Extract Data From Files

In [1]:
import os
import pandas as pd

### 1.0. Load Data from a Comma-Separated Values (CSV) File

In [2]:
data_dir = os.path.join(os.getcwd(), 'data')
data_file = os.path.join(data_dir, 'northwind_products.csv')

df = pd.read_csv(data_file, header=0, index_col=0)
df.head()

Unnamed: 0,supplier_ids,id,product_code,product_name,description,standard_cost,list_price,reorder_level,target_level,quantity_per_unit,discontinued,minimum_reorder_quantity,category,attachments
0,4,1,NWTB-1,Northwind Traders Chai,,13.5,18.0,10,40,10 boxes x 20 bags,0,10.0,Beverages,b''
1,10,3,NWTCO-3,Northwind Traders Syrup,,7.5,10.0,25,100,12 - 550 ml bottles,0,25.0,Condiments,b''
2,10,4,NWTCO-4,Northwind Traders Cajun Seasoning,,16.5,22.0,10,40,48 - 6 oz jars,0,10.0,Condiments,b''
3,10,5,NWTO-5,Northwind Traders Olive Oil,,16.0125,21.35,10,40,36 boxes,0,10.0,Oil,b''
4,2;6,6,NWTJP-6,Northwind Traders Boysenberry Spread,,18.75,25.0,25,100,12 - 8 oz jars,0,25.0,"Jams, Preserves",b''


### 2.0. Drop Unimportant Features

In [3]:
df.drop(['description','attachments'], axis=1, inplace=True)
df.columns

Index(['supplier_ids', 'id', 'product_code', 'product_name', 'standard_cost',
       'list_price', 'reorder_level', 'target_level', 'quantity_per_unit',
       'discontinued', 'minimum_reorder_quantity', 'category'],
      dtype='object')

### 3.0. Add a New Feature Using a Map (Dictionary) to Assign New Values

In [4]:
df['discontinued_desc'] = df.discontinued.map({1: 'True', 0: 'False'})
df.head()

Unnamed: 0,supplier_ids,id,product_code,product_name,standard_cost,list_price,reorder_level,target_level,quantity_per_unit,discontinued,minimum_reorder_quantity,category,discontinued_desc
0,4,1,NWTB-1,Northwind Traders Chai,13.5,18.0,10,40,10 boxes x 20 bags,0,10.0,Beverages,False
1,10,3,NWTCO-3,Northwind Traders Syrup,7.5,10.0,25,100,12 - 550 ml bottles,0,25.0,Condiments,False
2,10,4,NWTCO-4,Northwind Traders Cajun Seasoning,16.5,22.0,10,40,48 - 6 oz jars,0,10.0,Condiments,False
3,10,5,NWTO-5,Northwind Traders Olive Oil,16.0125,21.35,10,40,36 boxes,0,10.0,Oil,False
4,2;6,6,NWTJP-6,Northwind Traders Boysenberry Spread,18.75,25.0,25,100,12 - 8 oz jars,0,25.0,"Jams, Preserves",False


### 4.0. Identify Duplicated Observations

In [5]:
df[df.duplicated(subset='product_code', keep=False) == True]

Unnamed: 0,supplier_ids,id,product_code,product_name,standard_cost,list_price,reorder_level,target_level,quantity_per_unit,discontinued,minimum_reorder_quantity,category,discontinued_desc
4,2;6,6,NWTJP-6,Northwind Traders Boysenberry Spread,18.75,25.0,25,100,12 - 8 oz jars,0,25.0,"Jams, Preserves",False
10,2;6,20,NWTJP-6,Northwind Traders Marmalade,60.75,81.0,10,40,30 gift boxes,0,10.0,"Jams, Preserves",False
28,1,82,NWTC-82,Northwind Traders Granola,2.0,4.0,20,100,,0,,Cereal,False
42,1,97,NWTC-82,Northwind Traders Hot Cereal,3.0,5.0,50,200,,0,,Cereal,False
