In [1]:
# module 6
# in this module we will learn how to add or remove columns in our dataframe.
# we also look at example how we can combine information of multiple columns into one.

In [2]:
import pandas as pd

mul_people={
    "first":["Ahammad","Corey","John","Jaden","Boss"],
    "last":["Shawki","Schafer","Joker","Joker","Zack"],
    "email":["ahammadshawki8@gmail.com","CoreyMScafer@gmail.com",
             "joker420@outlook.com","jokerOfficial@yahoo.com","zack2@gmail.com"],
}

dic_df=pd.DataFrame(mul_people)
dic_df

Unnamed: 0,first,last,email
0,Ahammad,Shawki,ahammadshawki8@gmail.com
1,Corey,Schafer,CoreyMScafer@gmail.com
2,John,Joker,joker420@outlook.com
3,Jaden,Joker,jokerOfficial@yahoo.com
4,Boss,Zack,zack2@gmail.com


In [3]:
# first look at adding column.
# adding columns will be pretty easy for us.
# because it is the same thing that we did when we were updating values.

# we can simply create a column and pass in the series of values that we wanted for the column to have.
# lets say we want to combine our first and last column into a single column called fullname.
dic_df["first"]+" "+dic_df["last"]

0    Ahammad Shawki
1     Corey Schafer
2        John Joker
3       Jaden Joker
4         Boss Zack
dtype: object

In [4]:
# now here we have the series of values here.
# if we wanted to add the series as a column in our database,
# we can create a new column and assign that column to that series.
dic_df["fullname"]=dic_df["first"]+" "+dic_df["last"]

In [5]:
dic_df

Unnamed: 0,first,last,email,fullname
0,Ahammad,Shawki,ahammadshawki8@gmail.com,Ahammad Shawki
1,Corey,Schafer,CoreyMScafer@gmail.com,Corey Schafer
2,John,Joker,joker420@outlook.com,John Joker
3,Jaden,Joker,jokerOfficial@yahoo.com,Jaden Joker
4,Boss,Zack,zack2@gmail.com,Boss Zack


In [6]:
# again here we are using string.
# we can also create a new column using the apply method.
# we cant use the dot notation(."fullname") while assigning a new column like this.
# if we do that, python will think that 
# we are trying to assign attribute onto the dataframe object and not a column.

In [15]:
# now lets look at removing columns.
# now that we have our fullname column.
# lets say we no longer need our first and last name column.
# to remove this we can use the drop() method in our dataframe.
dic_df.drop(columns=["first","last"],inplace=True)
# here we want to drops columns and we are using list for multiple columns
# if we want to make that changes parmanent we could set inplace arguement to True.

KeyError: "['first' 'last'] not found in axis"

In [16]:
dic_df

Unnamed: 0,email,fullname
0,ahammadshawki8@gmail.com,Ahammad Shawki
1,CoreyMScafer@gmail.com,Corey Schafer
2,joker420@outlook.com,John Joker
3,jokerOfficial@yahoo.com,Jaden Joker
4,zack2@gmail.com,Boss Zack


In [17]:
# if we want to reverse that process and split that fullname column into first and last column,
# then it is a little more complicated process.
# lets use the split string function in our fullname column
dic_df["fullname"].str.split(" ")

0    [Ahammad, Shawki]
1     [Corey, Schafer]
2        [John, Joker]
3       [Jaden, Joker]
4         [Boss, Zack]
Name: fullname, dtype: object

In [21]:
# if we run that we can see that we are getting first and last value in a list.
# if we want to assign this into two different columns, 
# then we need to expand this list so that they are actually in two different columns.
# so to do this in pandas, we can do that setting "expand" arguement to True.
dic_df["fullname"].str.split(" ", expand=True)

Unnamed: 0,0,1
0,Ahammad,Shawki
1,Corey,Schafer
2,John,Joker
3,Jaden,Joker
4,Boss,Zack


In [23]:
# if we run that we can see that everything is pretty common.
# but we are getting first and last column instead of a list here.
# now what we need to do is that set first and last column in our dataframe to  those two returned columns.
dic_df[["first","last"]]=dic_df["fullname"].str.split(" ", expand=True)

In [24]:
dic_df

Unnamed: 0,email,fullname,first,last
0,ahammadshawki8@gmail.com,Ahammad Shawki,Ahammad,Shawki
1,CoreyMScafer@gmail.com,Corey Schafer,Corey,Schafer
2,joker420@outlook.com,John Joker,John,Joker
3,jokerOfficial@yahoo.com,Jaden Joker,Jaden,Joker
4,zack2@gmail.com,Boss Zack,Boss,Zack


In [25]:
# now lets look at adding and removing rows of data.
# there are couple of different ways.

# first look at adding a single row of data.
# we can do this with append method.
dic_df.append({"first":"Tony"})
# here we need to pass a dictionay as the arguement.

TypeError: Can only append a Series if ignore_index=True or if the Series has a name

In [27]:
# if we run that we can see that we are getting an error.
# it can sometimes be difficult to read this pandas errors and figure out what the problem is.
# but in this case it tells us exactly what to do.
# it says that:
# TypeError: Can only append a Series if ignore_index=True or if the Series has a name
# so lets just ignore the index 
# and our existing dataframe will automatically assign this nwe row in index itself.
# we can do that by passing an arguement of ignore_index=True
dic_df.append({"first":"Tony"},ignore_index=True)

Unnamed: 0,email,fullname,first,last
0,ahammadshawki8@gmail.com,Ahammad Shawki,Ahammad,Shawki
1,CoreyMScafer@gmail.com,Corey Schafer,Corey,Schafer
2,joker420@outlook.com,John Joker,John,Joker
3,jokerOfficial@yahoo.com,Jaden Joker,Jaden,Joker
4,zack2@gmail.com,Boss Zack,Boss,Zack
5,,,Tony,


In [28]:
# we can see taht it work. we no longer getting an error.
# and also we can see that this new name was appended.
# as we only assign the first column value,
# all other column values was set to NaN value which is use for missing values.

In [30]:
# again if we have a dataframe that we want to append in our existing dataframe then we can do that as well.
# lets create a new dataframe here.
mul_people2={
    "first":["Tony","Steve","Peter"],
    "last":["Stark","Rogers","Perker"],
    "email":["iamironman@stark.com","forevercaptainamerica@avengers.com","neighbourspider@outlook.com"]
}

dic_df2=pd.DataFrame(mul_people2)
dic_df2

Unnamed: 0,first,last,email
0,Tony,Stark,iamironman@stark.com
1,Steve,Rogers,forevercaptainamerica@avengers.com
2,Peter,Perker,neighbourspider@outlook.com


In [32]:
# now lets say we want to add this to our existing dataframe.
# one way we can do that is to simply addend one dataframe to other.
# now they have conflicting indexes.
# and they also have columns that aren't in the same order.
# in order to cope with the conflicting indexes we can set ignore_index arguement to True.
dic_df.append(dic_df2,ignore_index=True)

Unnamed: 0,email,first,fullname,last
0,ahammadshawki8@gmail.com,Ahammad,Ahammad Shawki,Shawki
1,CoreyMScafer@gmail.com,Corey,Corey Schafer,Schafer
2,joker420@outlook.com,John,John Joker,Joker
3,jokerOfficial@yahoo.com,Jaden,Jaden Joker,Joker
4,zack2@gmail.com,Boss,Boss Zack,Zack
5,iamironman@stark.com,Tony,,Stark
6,forevercaptainamerica@avengers.com,Steve,,Rogers
7,neighbourspider@outlook.com,Peter,,Perker


In [33]:
# if we get an waring here,
# it is because we didnt pass all the columns in same order when appending this.
# so it give us a warning here that there are different ways to sort the columns.
# dont worry of this.
# but in future version of pandas, it going to set the sort to False automatically.
# if we wanted to get rid of the values, then we can add a sort arguenment and which is equal to False.

# if we want to make this changes parmanent then we dont have the inplace method.
# we can do that just by-
dic_df=dic_df.append(dic_df2,ignore_index=True)

In [34]:
dic_df

Unnamed: 0,email,first,fullname,last
0,ahammadshawki8@gmail.com,Ahammad,Ahammad Shawki,Shawki
1,CoreyMScafer@gmail.com,Corey,Corey Schafer,Schafer
2,joker420@outlook.com,John,John Joker,Joker
3,jokerOfficial@yahoo.com,Jaden,Jaden Joker,Joker
4,zack2@gmail.com,Boss,Boss Zack,Zack
5,iamironman@stark.com,Tony,,Stark
6,forevercaptainamerica@avengers.com,Steve,,Rogers
7,neighbourspider@outlook.com,Peter,,Perker


In [35]:
# lastly, lets look at removing rows.
# lets say we are a iron man fan and we want to remove steve rogers from our dataframe.
# we can do that as the same way that we did to removing columns.
# we can use the drop() method.
# but instead of specifying the column that we want to drop, 
# we need to specify the indexes thatwe want to drop.
dic_df.drop(index=6)
# if we want to apply the change into dataframe, then we need to set the inplace arguemnt to True.

Unnamed: 0,email,first,fullname,last
0,ahammadshawki8@gmail.com,Ahammad,Ahammad Shawki,Shawki
1,CoreyMScafer@gmail.com,Corey,Corey Schafer,Schafer
2,joker420@outlook.com,John,John Joker,Joker
3,jokerOfficial@yahoo.com,Jaden,Jaden Joker,Joker
4,zack2@gmail.com,Boss,Boss Zack,Zack
5,iamironman@stark.com,Tony,,Stark
7,neighbourspider@outlook.com,Peter,,Perker


In [47]:
# we might need to do some complicated and remove rows using conditional.
# we can do this using loc[] indexer.
# But we can also do this using drop().
# if i want to remove the rows which last name is Joker, then we can pass in the indexes of that filter.
conditional=(dic_df["last"]=="Joker")
dic_df.drop(index=dic_df[conditional].index)
# here our index equal to our conditional.
# in order to return the indexes we need to add index attribute after our conditional.

Unnamed: 0,email,first,fullname,last
0,ahammadshawki8@gmail.com,Ahammad,Ahammad Shawki,Shawki
1,CoreyMScafer@gmail.com,Corey,Corey Schafer,Schafer
4,zack2@gmail.com,Boss,Boss Zack,Zack
5,iamironman@stark.com,Tony,,Stark
6,forevercaptainamerica@avengers.com,Steve,,Rogers
7,neighbourspider@outlook.com,Peter,,Perker
