In [1]:
import pandas as pd
import numpy as np

In [2]:
person = {
    "first": "Corey",
    "last": "Schafer",
    "email": ",CoreyMSchafer@gmail.com"
}

In [3]:
people = {
    "first": ["Corey", "Jane", "John", "Adam"],
    "last": ["Schafer", "Doe", "Doe", "Doe"],
    "email": ["CoreyMSchafer@gmail.com", "JaneDoe@email.com", "JohnDoe@email.com", "a@email.com" ]
}

In [4]:
people2 = {
    "first": ["Tony", "Steve"],
    "last": ["Stark", "Rogers"],
    "email": ["IronMan@avenge.com", "Cap@avenge.com"]
}

In [5]:
people3 = {
    "first": ["Corey", "Jane", "John", "Chris", np.nan, None, "NA"],
    "last": ["Schafer", "Doe", "Doe", "Schafer", np.nan, np.nan, "Missing"],
    "email": ["CoreyMSchafer@gmail.com", "JaneDoe@email.com", "JohnDoe@email.com", None, np.nan, "Anonymous@email.com", "NA"],
    "age": ["33", "55", "63", "36", None, None, "Missing"]
}

In [6]:
people["email"]


['CoreyMSchafer@gmail.com',
 'JaneDoe@email.com',
 'JohnDoe@email.com',
 'a@email.com']

In [7]:
df = pd.DataFrame(people)
df2 = pd.DataFrame(people)
df3 = pd.DataFrame(people2)
df4 = pd.DataFrame(people3)

In [8]:
df4.replace("NA", np.nan, inplace=True)
df4.replace("Missing", np.nan, inplace=True)

In [9]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Adam,Doe,a@email.com


In [10]:
df["email"]

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
3                a@email.com
Name: email, dtype: object

In [11]:
type(df["email"])

pandas.core.series.Series

In [12]:
df.email

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
3                a@email.com
Name: email, dtype: object

In [13]:
df[["last", "email"]]

Unnamed: 0,last,email
0,Schafer,CoreyMSchafer@gmail.com
1,Doe,JaneDoe@email.com
2,Doe,JohnDoe@email.com
3,Doe,a@email.com


In [14]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [15]:
df.iloc[[0, 1], 2]

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
Name: email, dtype: object

In [16]:
df.loc[[0,1], ["last", "email"]]

Unnamed: 0,last,email
0,Schafer,CoreyMSchafer@gmail.com
1,Doe,JaneDoe@email.com


In [17]:
df.set_index("email", inplace = True)

In [18]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe
a@email.com,Adam,Doe


In [19]:
df.index

Index(['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com',
       'a@email.com'],
      dtype='object', name='email')

In [20]:
df.loc["CoreyMSchafer@gmail.com", "last"]

'Schafer'

In [21]:
df.reset_index(inplace = True)
df

Unnamed: 0,email,first,last
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe
3,a@email.com,Adam,Doe


In [22]:
filt = (df["last"] == "Doe") & (df["first"] == "John")

In [23]:
df[filt]

Unnamed: 0,email,first,last
2,JohnDoe@email.com,John,Doe


In [24]:
filt2 = (df["last"] == "Schafer") | (df["first"] == "John")

In [25]:
df.loc[filt2, "email"]

0    CoreyMSchafer@gmail.com
2          JohnDoe@email.com
Name: email, dtype: object

In [26]:
df.loc[~filt2, "email"]

1    JaneDoe@email.com
3          a@email.com
Name: email, dtype: object

In [27]:
df2.columns

Index(['first', 'last', 'email'], dtype='object')

In [28]:
df2.columns = ["First Name", "Last Name", "Email"]
df2

Unnamed: 0,First Name,Last Name,Email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Adam,Doe,a@email.com


In [29]:
df2.columns = [x.lower() for x in df2.columns]
df2

Unnamed: 0,first name,last name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Adam,Doe,a@email.com


In [30]:
df2.columns = df2.columns.str.replace(" ", "_")
df2

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Adam,Doe,a@email.com


In [31]:
df2.rename(columns={"first_name": "first", "last_name": "last"}, inplace = True)
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Adam,Doe,a@email.com


In [32]:
df2.loc[2] = ["John", "Smith", "johnsmith@gmail.com"]
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,johnsmith@gmail.com
3,Adam,Doe,a@email.com


In [33]:
df2.loc[2, ["last", "email"]] = ["Doe", "JohnDoe@gmail.com"]
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@gmail.com
3,Adam,Doe,a@email.com


In [34]:
filt = (df2["email"] == "JohnDoe@gmail.com")
df2.loc[filt, "last"] = "Smith"
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@gmail.com
3,Adam,Doe,a@email.com


In [35]:
df2["email"] = df2["email"].str.lower()
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@gmail.com
3,Adam,Doe,a@email.com


In [36]:
df2["email"].apply(len)

0    23
1    17
2    17
3    11
Name: email, dtype: int64

In [37]:
def update_email(email):
    return email.upper()

In [38]:
df2["email"] = df2["email"].apply(update_email)
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@GMAIL.COM
3,Adam,Doe,A@EMAIL.COM


In [39]:
df2["email"] = df2["email"].apply(lambda x: x.lower())
df2

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@gmail.com
3,Adam,Doe,a@email.com


In [40]:
df2.apply(len, axis = "columns")

0    3
1    3
2    3
3    3
dtype: int64

In [41]:
df2.apply(pd.Series.min)

first           Adam
last             Doe
email    a@email.com
dtype: object

In [42]:
df2.apply(lambda x: x.min())

first           Adam
last             Doe
email    a@email.com
dtype: object

In [43]:
df2.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17
3,4,3,11


In [44]:
df2.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@gmail.com
3,adam,doe,a@email.com


In [45]:
df2["first"].map({"Corey": "Chris", "Jane": "Mary"})

0    Chris
1     Mary
2      NaN
3      NaN
Name: first, dtype: object

In [46]:
df2["first"].replace({"Corey": "Chris", "Jane": "Mary"})

0    Chris
1     Mary
2     John
3     Adam
Name: first, dtype: object

In [47]:
df2["full_name"] = df2["first"] + " " + df2["last"]
df2

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,coreymschafer@gmail.com,Corey Schafer
1,Jane,Doe,janedoe@email.com,Jane Doe
2,John,Smith,johndoe@gmail.com,John Smith
3,Adam,Doe,a@email.com,Adam Doe


In [48]:
df2.drop(columns=["first", "last"], inplace = True)
df2

Unnamed: 0,email,full_name
0,coreymschafer@gmail.com,Corey Schafer
1,janedoe@email.com,Jane Doe
2,johndoe@gmail.com,John Smith
3,a@email.com,Adam Doe


In [49]:
df2["full_name"].str.split(" ", expand = True)

Unnamed: 0,0,1
0,Corey,Schafer
1,Jane,Doe
2,John,Smith
3,Adam,Doe


In [50]:
df2[["first", "last"]] = df2["full_name"].str.split(" ", expand = True)
df2

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Corey Schafer,Corey,Schafer
1,janedoe@email.com,Jane Doe,Jane,Doe
2,johndoe@gmail.com,John Smith,John,Smith
3,a@email.com,Adam Doe,Adam,Doe


In [51]:
df2.append({"first": "Tony"}, ignore_index=True)

  df2.append({"first": "Tony"}, ignore_index=True)


Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Corey Schafer,Corey,Schafer
1,janedoe@email.com,Jane Doe,Jane,Doe
2,johndoe@gmail.com,John Smith,John,Smith
3,a@email.com,Adam Doe,Adam,Doe
4,,,Tony,


In [52]:
df2.append(df3, ignore_index=True)

  df2.append(df3, ignore_index=True)


Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Corey Schafer,Corey,Schafer
1,janedoe@email.com,Jane Doe,Jane,Doe
2,johndoe@gmail.com,John Smith,John,Smith
3,a@email.com,Adam Doe,Adam,Doe
4,IronMan@avenge.com,,Tony,Stark
5,Cap@avenge.com,,Steve,Rogers


In [53]:
df2 = df2.append(df3, ignore_index=True)

  df2 = df2.append(df3, ignore_index=True)


In [54]:
df2.drop(index = 4)

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Corey Schafer,Corey,Schafer
1,janedoe@email.com,Jane Doe,Jane,Doe
2,johndoe@gmail.com,John Smith,John,Smith
3,a@email.com,Adam Doe,Adam,Doe
5,Cap@avenge.com,,Steve,Rogers


In [55]:
df2.drop(index=df2[df2["last"] == "Doe"].index)

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Corey Schafer,Corey,Schafer
2,johndoe@gmail.com,John Smith,John,Smith
4,IronMan@avenge.com,,Tony,Stark
5,Cap@avenge.com,,Steve,Rogers


In [56]:
df2.dropna(subset = ["full_name"], inplace = True)

In [57]:
df2.sort_values(by=["last", "first"], ascending=[False, True], inplace = True)
df2

Unnamed: 0,email,full_name,first,last
2,johndoe@gmail.com,John Smith,John,Smith
0,coreymschafer@gmail.com,Corey Schafer,Corey,Schafer
3,a@email.com,Adam Doe,Adam,Doe
1,janedoe@email.com,Jane Doe,Jane,Doe


In [58]:
df2.sort_index()

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Corey Schafer,Corey,Schafer
1,janedoe@email.com,Jane Doe,Jane,Doe
2,johndoe@gmail.com,John Smith,John,Smith
3,a@email.com,Adam Doe,Adam,Doe


In [59]:
df2["last"].sort_values()

3        Doe
1        Doe
0    Schafer
2      Smith
Name: last, dtype: object

In [60]:
df4

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
4,,,,
5,,,Anonymous@email.com,
6,,,,


In [61]:
df4.dropna()

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63


In [62]:
df4.dropna(axis="index", how="any")

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63


In [63]:
df4.dropna(axis="index", how="all")

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
5,,,Anonymous@email.com,


In [64]:
df4.dropna(axis="columns", how="any")

0
1
2
3
4
5
6


In [65]:
df4.dropna(axis="columns", how="all")

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
4,,,,
5,,,Anonymous@email.com,
6,,,,


In [66]:
df4.dropna(axis="index", how="any", subset=["email"])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
5,,,Anonymous@email.com,


In [67]:
df4.dropna(axis="index", how="all", subset=["last", "email"])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
5,,,Anonymous@email.com,


In [68]:
df4.isna()

Unnamed: 0,first,last,email,age
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,True,False
4,True,True,True,True
5,True,True,False,True
6,True,True,True,True


In [69]:
df4.fillna(0)

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,0,36
4,0,0,0,0
5,0,0,Anonymous@email.com,0
6,0,0,0,0


In [70]:
df4.dtypes

first    object
last     object
email    object
age      object
dtype: object

In [71]:
df4["age"] = df4["age"].astype(float)
df4.dtypes

first     object
last      object
email     object
age      float64
dtype: object

In [72]:
df4["age"].mean()

46.75