## Projecting and Filtering

Let us understand how to project as well as filter data in Data Frames.

In [None]:
%run 06_csv_to_pandas_data_frame.ipynb

In [None]:
orders

In [None]:
order_items

* Projecting data

In [None]:
orders.order_date

In [None]:
orders['order_date']

In [None]:
# Project order_item_order_id and order_item_subtotal
order_items[['order_item_order_id', 'order_item_subtotal']]

* Filter for order_item_order_id 2

In [None]:
order_items.columns

In [None]:
order_items.order_item_order_id == 2

In [None]:
order_items[order_items.order_item_order_id == 2]

In [None]:
order_items['order_item_order_id'] == 2

In [None]:
order_items[order_items['order_item_order_id'] == 2]

In [None]:
order_items.query('order_item_order_id == 2')

In [None]:
order_items[
    (order_items.order_item_order_id == 2) &
    ((order_items.order_item_subtotal >= 150) &
     (order_items.order_item_subtotal <= 250)
    )]

```{note}
String passed to `query` API is broken into multiple lines for readability purposes.
```

In [None]:
order_items.query('order_item_order_id == 2 and ' +
                  'order_item_subtotal >= 150 and ' +
                  'order_item_subtotal <= 250')

In [None]:
orders[orders.order_date == '2013-08-01 00:00:00.0']

In [None]:
orders.query('order_date == "2013-08-01 00:00:00.0"')

```{note}
We can use the functions available as part of `str` usng `python` as engine.
```

In [None]:
order_date = '2013-08-01 00:00:00.0'

In [None]:
order_date.startswith?

In [None]:
order_date.startswith('2013-08')

In [None]:
orders.query('order_date.str.startswith("2013-08")', engine='python')

### Task 1
Get all the orders placed by customer_id

In [None]:
orders[:10]

In [None]:
orders.query('order_customer_id == 12431')

In [None]:
orders[orders.order_customer_id == 12431]

In [None]:
orders[orders['order_customer_id'] == 12431]

### Task 2

Get all the orders placed by customer_id for a given month. Month is passed as **yyyy-MM** format.

In [None]:
orders.query('order_customer_id == 12431 and order_date.str.startswith("2014-01")', engine='python')

In [None]:
orders[(orders.order_customer_id == 12431) & (orders.order_date.str.startswith('2014-01'))]

### Task 3
Get all the orders which are placed by customer with id 12431 in January 2014 and status is in PENDING_PAYMENT or PROCESSING

In [None]:
orders.query('order_customer_id == 12431 and ' +
             'order_date.str.startswith("2014-01") and ' +
             'order_status in ("PROCESSING", "PENDING_PAYMENT")', 
             engine='python'
            )

In [None]:
orders[(orders.order_customer_id == 12431) & 
       (orders.order_date.str.startswith('2014-01')) &
       (orders.order_status.isin(['PROCESSING', 'PENDING_PAYMENT']))
      ]