# Dropping rows and columns
### The .drop() method drops rows and columns from a DataFrame
* Speify axis=0 to drop rows by label, and axis=1 to drop columns

In [1]:
import numpy as np
import pandas as pd

In [13]:
retail_df = pd.read_csv("../DataFrames/retail_2016_2017.csv")
retail_df

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,1945944,2016-01-01,1,AUTOMOTIVE,0.000,0
1,1945945,2016-01-01,1,BABY CARE,0.000,0
2,1945946,2016-01-01,1,BEAUTY,0.000,0
3,1945947,2016-01-01,1,BEVERAGES,0.000,0
4,1945948,2016-01-01,1,BOOKS,0.000,0
...,...,...,...,...,...,...
1054939,3000883,2017-08-15,9,POULTRY,438.133,0
1054940,3000884,2017-08-15,9,PREPARED FOODS,154.553,1
1054941,3000885,2017-08-15,9,PRODUCE,2419.729,148
1054942,3000886,2017-08-15,9,SCHOOL AND OFFICE SUPPLIES,121.000,8


### Dropping Columns

In [4]:
retail_df.drop("id", axis=1).head() # This returns the first 5 rows of the retail_df DataFrame without the "id" column

Unnamed: 0,date,store_nbr,family,sales,onpromotion
0,2016-01-01,1,AUTOMOTIVE,0.0,0
1,2016-01-01,1,BABY CARE,0.0,0
2,2016-01-01,1,BEAUTY,0.0,0
3,2016-01-01,1,BEVERAGES,0.0,0
4,2016-01-01,1,BOOKS,0.0,0


In [14]:
retail_df.drop(["id", "onpromotion"], inplace=True, axis=1) # You can specify inplace=True to permanently remove rows or columns from a DataFrame
retail_df.head()

Unnamed: 0,date,store_nbr,family,sales
0,2016-01-01,1,AUTOMOTIVE,0.0
1,2016-01-01,1,BABY CARE,0.0
2,2016-01-01,1,BEAUTY,0.0
3,2016-01-01,1,BEVERAGES,0.0
4,2016-01-01,1,BOOKS,0.0


### Dropping unncessary columns early in your workflow to save memory and make DataFrames more manageable

### Dropping rows 

In [15]:
retail_df = pd.read_csv("../DataFrames/retail_2016_2017.csv")
retail_df

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,1945944,2016-01-01,1,AUTOMOTIVE,0.000,0
1,1945945,2016-01-01,1,BABY CARE,0.000,0
2,1945946,2016-01-01,1,BEAUTY,0.000,0
3,1945947,2016-01-01,1,BEVERAGES,0.000,0
4,1945948,2016-01-01,1,BOOKS,0.000,0
...,...,...,...,...,...,...
1054939,3000883,2017-08-15,9,POULTRY,438.133,0
1054940,3000884,2017-08-15,9,PREPARED FOODS,154.553,1
1054941,3000885,2017-08-15,9,PRODUCE,2419.729,148
1054942,3000886,2017-08-15,9,SCHOOL AND OFFICE SUPPLIES,121.000,8


In [17]:
retail_df.drop([0], axis=0).head() # Row Label is passed as a list

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
1,1945945,2016-01-01,1,BABY CARE,0.0,0
2,1945946,2016-01-01,1,BEAUTY,0.0,0
3,1945947,2016-01-01,1,BEVERAGES,0.0,0
4,1945948,2016-01-01,1,BOOKS,0.0,0
5,1945949,2016-01-01,1,BREAD/BAKERY,0.0,0


In [19]:
retail_df.drop(range(5), axis=0).head() # ranges can be passed to drop consecutive labels, in this case 0-4

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
5,1945949,2016-01-01,1,BREAD/BAKERY,0.0,0
6,1945950,2016-01-01,1,CELEBRATION,0.0,0
7,1945951,2016-01-01,1,CLEANING,0.0,0
8,1945952,2016-01-01,1,DAIRY,0.0,0
9,1945953,2016-01-01,1,DELI,0.0,0


### Typically you won't drop rows but instead columns. But you would drop rows via slicing or filtering

### More examples

In [27]:
oil = pd.read_csv("../DataFrames/oil.csv")
oil.columns = ['date', 'price']
oil['euro_price'] = oil['price'] * 1.1
oil.head()


Unnamed: 0,date,price,euro_price
0,2013-01-01,,
1,2013-01-02,93.14,102.454
2,2013-01-03,92.97,102.267
3,2013-01-04,93.12,102.432
4,2013-01-07,93.2,102.52


In [30]:
oil_euro = oil.drop('price', axis=1).head() # we dont want to drop this label inplace but we want to keep the dataframe for workflow. so we'll assign it to a variable
oil_euro

Unnamed: 0,date,euro_price
0,2013-01-01,
1,2013-01-02,102.454
2,2013-01-03,102.267
3,2013-01-04,102.432
4,2013-01-07,102.52


In [36]:
oil_euro.drop(1, axis=0).head()

Unnamed: 0,date,euro_price
0,2013-01-01,
2,2013-01-03,102.267
3,2013-01-04,102.432
4,2013-01-07,102.52


In [37]:
oil_euro

Unnamed: 0,date,euro_price
0,2013-01-01,
1,2013-01-02,102.454
2,2013-01-03,102.267
3,2013-01-04,102.432
4,2013-01-07,102.52
