# Droping elements from DataFrames

## Setup

In [1]:
import pandas as pd

## Creation

Creation of an example DataFrame (starting from a dictionary of dictionaries):

In [2]:
data = {
    "Capital": {
        "Spain": "Madrid",
        "Belgium": "Brussels",
        "France": "Paris",
        "Italy": "Roma",
        "Germany": "Berlin",
        "Portugal": "Lisbon",
        "Norway": "Oslo",
        "Greece": "Athens",
    },
    "Population": {
        "Spain": 46733038,
        "Belgium": 11449656,
        "France": 67076000,
        "Italy": 60390560,
        "Germany": 83122889,
        "Portugal": 10295909,
        "Norway": 5391369,
        "Greece": 10718565,
    },
    "Monarch": {
        "Spain": "Felipe VI",
        "Belgium": "Philippe",
        "Norway": "Harald V",
    },
    "Area": {
        "Spain": 505990,
        "Belgium": 30688,
        "France": 640679,
        "Italy": 301340,
        "Germany": 357022,
        "Portugal": 92212,
        "Norway": 385207,
        "Greece": 131957,
    },
}

In [3]:
# For now, let's forget about these steps:
df = pd.DataFrame(data)
df["Capital"] = df["Capital"].astype("string")
df["Monarch"] = df["Monarch"].astype("string")

Apple stock data, taken from the [`matplotlib` sample datasets](https://github.com/matplotlib/sample_data/blob/master/aapl.csv)

In [4]:
# For now, let's forget about these steps:
apple = pd.read_csv("AAPL.csv")
apple["Date"] = apple["Date"].astype("datetime64[ns]")
apple = apple.set_index("Date")
apple = apple.sort_index()

## Demo 1: Droping columns

In [5]:
df

Unnamed: 0,Capital,Population,Monarch,Area
Spain,Madrid,46733038,Felipe VI,505990
Belgium,Brussels,11449656,Philippe,30688
France,Paris,67076000,,640679
Italy,Roma,60390560,,301340
Germany,Berlin,83122889,,357022
Portugal,Lisbon,10295909,,92212
Norway,Oslo,5391369,Harald V,385207
Greece,Athens,10718565,,131957


Drop one column:

In [6]:
df.drop("Capital", axis="columns")

Unnamed: 0,Population,Monarch,Area
Spain,46733038,Felipe VI,505990
Belgium,11449656,Philippe,30688
France,67076000,,640679
Italy,60390560,,301340
Germany,83122889,,357022
Portugal,10295909,,92212
Norway,5391369,Harald V,385207
Greece,10718565,,131957


Drop several columns:

In [7]:
df.drop(["Capital", "Monarch"], axis="columns")

Unnamed: 0,Population,Area
Spain,46733038,505990
Belgium,11449656,30688
France,67076000,640679
Italy,60390560,301340
Germany,83122889,357022
Portugal,10295909,92212
Norway,5391369,385207
Greece,10718565,131957


## Exercise 1

In [8]:
apple.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1984-09-07,26.5,26.87,26.25,26.5,2981600,3.02
1984-09-10,26.5,26.62,25.87,26.37,2346400,3.01
1984-09-11,26.62,27.37,26.62,26.87,5444000,3.07
1984-09-12,26.87,27.0,26.12,26.12,4773600,2.98
1984-09-13,27.5,27.62,27.5,27.5,7429600,3.14


Drop the "Adj Close" column from the `apple` DataFrame:

In [10]:
apple.drop("Adj Close", axis="columns")

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1984-09-07,26.50,26.87,26.25,26.50,2981600
1984-09-10,26.50,26.62,25.87,26.37,2346400
1984-09-11,26.62,27.37,26.62,26.87,5444000
1984-09-12,26.87,27.00,26.12,26.12,4773600
1984-09-13,27.50,27.62,27.50,27.50,7429600
...,...,...,...,...,...
2008-10-08,85.91,96.33,85.68,89.79,78847900
2008-10-09,93.35,95.80,86.60,88.74,57763700
2008-10-10,85.70,100.00,85.00,96.80,79260700
2008-10-13,104.55,110.53,101.02,110.26,54967000


Drop the "High" and "Low" columna from the `apple` DataFrame:

In [12]:
apple.drop(["High","Low"], axis="columns")

Unnamed: 0_level_0,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1984-09-07,26.50,26.50,2981600,3.02
1984-09-10,26.50,26.37,2346400,3.01
1984-09-11,26.62,26.87,5444000,3.07
1984-09-12,26.87,26.12,4773600,2.98
1984-09-13,27.50,27.50,7429600,3.14
...,...,...,...,...
2008-10-08,85.91,89.79,78847900,89.79
2008-10-09,93.35,88.74,57763700,88.74
2008-10-10,85.70,96.80,79260700,96.80
2008-10-13,104.55,110.26,54967000,110.26


## Demo 2: Droping rows

In [13]:
df

Unnamed: 0,Capital,Population,Monarch,Area
Spain,Madrid,46733038,Felipe VI,505990
Belgium,Brussels,11449656,Philippe,30688
France,Paris,67076000,,640679
Italy,Roma,60390560,,301340
Germany,Berlin,83122889,,357022
Portugal,Lisbon,10295909,,92212
Norway,Oslo,5391369,Harald V,385207
Greece,Athens,10718565,,131957


Drop one row:

In [14]:
df.drop("Spain")

Unnamed: 0,Capital,Population,Monarch,Area
Belgium,Brussels,11449656,Philippe,30688
France,Paris,67076000,,640679
Italy,Roma,60390560,,301340
Germany,Berlin,83122889,,357022
Portugal,Lisbon,10295909,,92212
Norway,Oslo,5391369,Harald V,385207
Greece,Athens,10718565,,131957


Drop several columns:

In [15]:
df.drop(["Norway", "Germany"])

Unnamed: 0,Capital,Population,Monarch,Area
Spain,Madrid,46733038,Felipe VI,505990
Belgium,Brussels,11449656,Philippe,30688
France,Paris,67076000,,640679
Italy,Roma,60390560,,301340
Portugal,Lisbon,10295909,,92212
Greece,Athens,10718565,,131957


## Exercise 2

In [16]:
apple.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1984-09-07,26.5,26.87,26.25,26.5,2981600,3.02
1984-09-10,26.5,26.62,25.87,26.37,2346400,3.01
1984-09-11,26.62,27.37,26.62,26.87,5444000,3.07
1984-09-12,26.87,27.0,26.12,26.12,4773600,2.98
1984-09-13,27.5,27.62,27.5,27.5,7429600,3.14


Drop the "1984-09-07" row from the `apple` DataFrame:

In [21]:
# apple.drop('1984-09-07') does not work because the index is made up of datetime objects
apple.drop(pd.Timestamp("1984-09-07"))

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1984-09-10,26.50,26.62,25.87,26.37,2346400,3.01
1984-09-11,26.62,27.37,26.62,26.87,5444000,3.07
1984-09-12,26.87,27.00,26.12,26.12,4773600,2.98
1984-09-13,27.50,27.62,27.50,27.50,7429600,3.14
1984-09-14,27.62,28.50,27.62,27.87,8826400,3.18
...,...,...,...,...,...,...
2008-10-08,85.91,96.33,85.68,89.79,78847900,89.79
2008-10-09,93.35,95.80,86.60,88.74,57763700,88.74
2008-10-10,85.70,100.00,85.00,96.80,79260700,96.80
2008-10-13,104.55,110.53,101.02,110.26,54967000,110.26


Drop the "2008-10-13" and "2008-10-14" rows from the `apple` DataFrame:

In [23]:
apple.drop([pd.Timestamp("2008-10-13"),pd.Timestamp("2008-10-14")])

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1984-09-07,26.50,26.87,26.25,26.50,2981600,3.02
1984-09-10,26.50,26.62,25.87,26.37,2346400,3.01
1984-09-11,26.62,27.37,26.62,26.87,5444000,3.07
1984-09-12,26.87,27.00,26.12,26.12,4773600,2.98
1984-09-13,27.50,27.62,27.50,27.50,7429600,3.14
...,...,...,...,...,...,...
2008-10-06,91.96,98.78,87.54,98.14,75264900,98.14
2008-10-07,100.48,101.50,88.95,89.16,67099000,89.16
2008-10-08,85.91,96.33,85.68,89.79,78847900,89.79
2008-10-09,93.35,95.80,86.60,88.74,57763700,88.74
