# Pandas Part 6

### Add/Remove Rows and Columns From Dataframes

Link: https://youtu.be/HQ6XO9eT-fc?

In [1]:
import pandas as pd

#### Adding columns

In [2]:
people = {
    'first': ['John', 'Jane', 'Jim'],
    'last': ['Doe', 'Doe', 'Brown'],
    'email': ['john.doe@example.com', 'jane.doe@example.com', 'jim.brown@example.com']
}

In [3]:
people_df = pd.DataFrame(people) 
people_df

Unnamed: 0,first,last,email
0,John,Doe,john.doe@example.com
1,Jane,Doe,jane.doe@example.com
2,Jim,Brown,jim.brown@example.com


In [4]:
people_df['full_name'] = people_df['first'] + ' ' + people_df['last']
people_df

Unnamed: 0,first,last,email,full_name
0,John,Doe,john.doe@example.com,John Doe
1,Jane,Doe,jane.doe@example.com,Jane Doe
2,Jim,Brown,jim.brown@example.com,Jim Brown


#### Removing colums

In [5]:
people_df.drop(columns=['first', 'last'], inplace = True) 
people_df

Unnamed: 0,email,full_name
0,john.doe@example.com,John Doe
1,jane.doe@example.com,Jane Doe
2,jim.brown@example.com,Jim Brown


In [6]:
# The expand argument allows us to return a datframe instead of a series
people_df[['first','last']] = people_df['full_name'].str.split(' ', expand = True)
people_df

Unnamed: 0,email,full_name,first,last
0,john.doe@example.com,John Doe,John,Doe
1,jane.doe@example.com,Jane Doe,Jane,Doe
2,jim.brown@example.com,Jim Brown,Jim,Brown


#### Adding rows 

Explanation from StackOverflow as to why append method was deprecated:

_Create a list with your dictionaries, if they are needed, and then create a new dataframe with_

```df = pd.DataFrame.from_records(your_list)```

_List's "append" method are very efficient and won't be ever deprecated. Dataframes on the other hand, frequently have to be recreated and all data copied over on appends, due to their design - that is why they deprecated the method_

In [7]:
# Append method is deprecated. Use concat method instead
people_df = pd.concat([people_df, pd.DataFrame.from_records([{'first':'Tony'}])],ignore_index=True)
people_df.loc[len(people_df), ['first','last']] = 'Mary', 'Jane'
people_df.loc[len(people_df), people_df.columns] = 'tess.trig@example.com', 'Tess Trig', 'Tess', 'Trig'

In [8]:
people_df

Unnamed: 0,email,full_name,first,last
0,john.doe@example.com,John Doe,John,Doe
1,jane.doe@example.com,Jane Doe,Jane,Doe
2,jim.brown@example.com,Jim Brown,Jim,Brown
3,,,Tony,
4,,,Mary,Jane
5,tess.trig@example.com,Tess Trig,Tess,Trig


In [9]:
avengers = {
    'first': ['Tony', 'Steve'],
    'last': ['Stark', 'Rogers'],
    'email': ['IronMan@avenge.com', 'Cap@avenge.com']
}

In [10]:
people_df = pd.concat([people_df, pd.DataFrame(avengers)], ignore_index=True)
people_df

Unnamed: 0,email,full_name,first,last
0,john.doe@example.com,John Doe,John,Doe
1,jane.doe@example.com,Jane Doe,Jane,Doe
2,jim.brown@example.com,Jim Brown,Jim,Brown
3,,,Tony,
4,,,Mary,Jane
5,tess.trig@example.com,Tess Trig,Tess,Trig
6,IronMan@avenge.com,,Tony,Stark
7,Cap@avenge.com,,Steve,Rogers


#### Remove rows

In [11]:
people_df.drop(index=range(3,6), inplace=True)
people_df = people_df.reset_index(drop=True) # To reset indexes after the drop
people_df

Unnamed: 0,email,full_name,first,last
0,john.doe@example.com,John Doe,John,Doe
1,jane.doe@example.com,Jane Doe,Jane,Doe
2,jim.brown@example.com,Jim Brown,Jim,Brown
3,IronMan@avenge.com,,Tony,Stark
4,Cap@avenge.com,,Steve,Rogers


In [12]:
filt_last = (people_df['last'] == 'Doe')
#people_df[filt_last]
people_df.drop(index=people_df[filt_last].index)

Unnamed: 0,email,full_name,first,last
2,jim.brown@example.com,Jim Brown,Jim,Brown
3,IronMan@avenge.com,,Tony,Stark
4,Cap@avenge.com,,Steve,Rogers
