In [105]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

people

{'first': ['Corey', 'Jane', 'John'],
 'last': ['Schafer', 'Doe', 'Doe'],
 'email': ['CoreyMSchafer@gmail.com',
  'JaneDoe@email.com',
  'JohnDoe@email.com']}

In [106]:
people['email']

['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']

In [107]:
import pandas as pd

## Esto es un data frame

In [108]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [109]:
df['email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

## Una Serie es un dataframe de una sola columna

In [110]:
type(df['email'])

pandas.core.series.Series

In [111]:
df.email

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [112]:
df.count()

first    3
last     3
email    3
dtype: int64

## Esto es otro data frame

In [113]:
df[['first','email']]

Unnamed: 0,first,email
0,Corey,CoreyMSchafer@gmail.com
1,Jane,JaneDoe@email.com
2,John,JohnDoe@email.com


In [114]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

## Loc & iLoc

In [115]:
df.iloc[0]

first                      Corey
last                     Schafer
email    CoreyMSchafer@gmail.com
Name: 0, dtype: object

In [116]:
df.iloc[[0,1]]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com


In [117]:
df.iloc[[1,2]]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [118]:
df.iloc[[1,2],2]

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

In [119]:
df.iloc[[1,2],[1.2]]

Unnamed: 0,last
1,Doe
2,Doe


In [120]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [121]:
df.loc[0]

first                      Corey
last                     Schafer
email    CoreyMSchafer@gmail.com
Name: 0, dtype: object

In [122]:
df.loc[[0,1]]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com


In [123]:
df.loc[[0,1],'email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
Name: email, dtype: object

In [124]:
df.loc[[0,1],['email', 'last']]

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,JaneDoe@email.com,Doe


In [125]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


# Ch 4 Filters

In [126]:
df['last'] == 'Doe'

0    False
1     True
2     True
Name: last, dtype: bool

In [127]:
filt = (df['last'] == 'Doe')

In [128]:
df[filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [129]:
df.loc[filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [130]:
df.loc[filt, 'email']

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

### And (&)

In [131]:
filt = (df['last'] == 'Doe') & (df['first'] == 'John' )

In [132]:
df.loc[filt, 'email']

2    JohnDoe@email.com
Name: email, dtype: object

### Or(|)

In [133]:
filt = (df['last'] == 'Schafer') | (df['first'] == 'John' )

In [134]:
df.loc[filt]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
2,John,Doe,JohnDoe@email.com


### Nagación

In [135]:
df.loc[-filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com


# Part 5 Updating Rows and Columns - Modifying Data Within DataFrames

### Updating Columns

In [136]:
df.columns      

Index(['first', 'last', 'email'], dtype='object')

In [137]:
df.columns = ['first name', 'last name', 'email']
df.columns

Index(['first name', 'last name', 'email'], dtype='object')

In [138]:
df

Unnamed: 0,first name,last name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [139]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [140]:
df.columns = df.columns.str.replace(' ', '_')

In [141]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [142]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [145]:
df.rename(columns={'first_name':'first', 'last_name':'last' }, inplace=True)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


### Updating Rows

In [146]:
df.loc[2]

first                 John
last                   Doe
email    JohnDoe@email.com
Name: 2, dtype: object

In [148]:
df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnSmith@email.com


In [149]:
df.loc[2,['last', 'email']]

last                   Smith
email    JohnSmith@email.com
Name: 2, dtype: object

In [151]:
df.loc[2,['last', 'email']] = ['Doe', 'JohnDo@email.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDo@email.com


In [153]:
df.loc[2,[ 'email']] = 'JohnDos@email.com'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDos@email.com


In [155]:
df.at[2,'email'] = 'JohnDoe@email.com'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [157]:
filt = df['email'] == 'JohnDoe@email.com'
df[filt]['last']

2    Doe
Name: last, dtype: object

#### Sin usar loc mara error

In [158]:
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Smith'


In [159]:
df.loc[filt,'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [163]:
df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [164]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [165]:
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [166]:
def update_email(email):
    return email.upper()

In [167]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [168]:
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [169]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [170]:
len(df['email'])

3

In [171]:
df.apply(len, axis='rows')

first    3
last     3
email    3
dtype: int64

In [172]:
df.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

In [173]:
df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [174]:
df.apply(pd.Series.max)

first                 John
last                 Smith
email    johndoe@email.com
dtype: object

In [176]:
df.apply(lambda x: x.min())

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [177]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [178]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


In [179]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [180]:
df['first'].map({'Corey': 'Chris', 'Jane':'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [181]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [182]:
df['first'].replace({'Corey': 'Chris', 'Jane':'Mary'})

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [183]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [184]:
df['first']=df['first'].replace({'Corey': 'Chris', 'Jane':'Mary'})
df

Unnamed: 0,first,last,email
0,Chris,Schafer,coreymschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


# Part 6: Add/Remove Rows and Columns From DataFrames

In [185]:
df['first'] + ' ' + df['last']

0    Chris Schafer
1         Mary Doe
2       John Smith
dtype: object

In [186]:
df['full_name'] = df['first'] + ' ' + df['last']
df

Unnamed: 0,first,last,email,full_name
0,Chris,Schafer,coreymschafer@gmail.com,Chris Schafer
1,Mary,Doe,janedoe@email.com,Mary Doe
2,John,Smith,johndoe@email.com,John Smith


In [189]:
df.drop(columns=['first', 'last'], inplace=True)
df


Unnamed: 0,email,full_name
0,coreymschafer@gmail.com,Chris Schafer
1,janedoe@email.com,Mary Doe
2,johndoe@email.com,John Smith


In [190]:
df['full_name'].str.split(' ')

0    [Chris, Schafer]
1         [Mary, Doe]
2       [John, Smith]
Name: full_name, dtype: object

In [191]:
df['full_name'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,Chris,Schafer
1,Mary,Doe
2,John,Smith


In [192]:
df[['first', 'last']]= df['full_name'].str.split(' ', expand=True)
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith


In [193]:
df.append({'first':'Tony'})

  df.append({'first':'Tony'})


TypeError: Can only append a dict if ignore_index=True

In [194]:
df.append({'first':'Tony'}, ignore_index=True)


  df.append({'first':'Tony'}, ignore_index=True)


Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith
3,,,Tony,


In [195]:
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith


In [196]:
people = {
    "first": ["Tony", 'Natasha'], 
    "last": ["Stark", 'Romanova'], 
    "email": ["IronMan@avenger.com", 'BlackWidow@avenger.com']
}
df2 = pd.DataFrame(people)
df2

Unnamed: 0,first,last,email
0,Tony,Stark,IronMan@avenger.com
1,Natasha,Romanova,BlackWidow@avenger.com


In [197]:
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith


In [202]:
df= df.append(df2, ignore_index=True, sort=False)

  df= df.append(df2, ignore_index=True, sort=False)


In [203]:
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith
3,IronMan@avenger.com,,Tony,Stark
4,BlackWidow@avenger.com,,Natasha,Romanova


In [205]:
df.drop(index=3, inplace=True)
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith
4,BlackWidow@avenger.com,,Natasha,Romanova


In [207]:
f = (df['last'] =='Doe')
df[f]

Unnamed: 0,email,full_name,first,last
1,janedoe@email.com,Mary Doe,Mary,Doe


In [208]:
df.drop(index=df[f].index)

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Chris Schafer,Chris,Schafer
2,johndoe@email.com,John Smith,John,Smith
4,BlackWidow@avenger.com,,Natasha,Romanova


# Part 7: Sorting Data

In [210]:
df.sort_values(by='last')

Unnamed: 0,email,full_name,first,last
1,janedoe@email.com,Mary Doe,Mary,Doe
4,BlackWidow@avenger.com,,Natasha,Romanova
0,coreymschafer@gmail.com,Chris Schafer,Chris,Schafer
2,johndoe@email.com,John Smith,John,Smith


In [211]:
df['last']=df['last'].replace({'Romanova': 'Romanov'})
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith
4,BlackWidow@avenger.com,,Natasha,Romanov


In [213]:
df['full_name'] = df['first']+ ' ' +df['last']
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,Chris Schafer,Chris,Schafer
1,janedoe@email.com,Mary Doe,Mary,Doe
2,johndoe@email.com,John Smith,John,Smith
4,BlackWidow@avenger.com,Natasha Romanov,Natasha,Romanov


In [216]:
df= df.applymap(str.lower)
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,chris schafer,chris,schafer
1,janedoe@email.com,mary doe,mary,doe
2,johndoe@email.com,john smith,john,smith
4,blackwidow@avenger.com,natasha romanov,natasha,romanov


In [217]:
df.sort_values(by='last', ascending=False)

Unnamed: 0,email,full_name,first,last
2,johndoe@email.com,john smith,john,smith
0,coreymschafer@gmail.com,chris schafer,chris,schafer
4,blackwidow@avenger.com,natasha romanov,natasha,romanov
1,janedoe@email.com,mary doe,mary,doe


In [218]:
df['last']=df['last'].replace({'smith': 'doe'})
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,chris schafer,chris,schafer
1,janedoe@email.com,mary doe,mary,doe
2,johndoe@email.com,john smith,john,doe
4,blackwidow@avenger.com,natasha romanov,natasha,romanov


In [219]:
df['full_name'] = df['first']+ ' ' +df['last']
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,chris schafer,chris,schafer
1,janedoe@email.com,mary doe,mary,doe
2,johndoe@email.com,john doe,john,doe
4,blackwidow@avenger.com,natasha romanov,natasha,romanov


In [220]:
df.sort_values(by=['last', 'first'], ascending=False)

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,chris schafer,chris,schafer
4,blackwidow@avenger.com,natasha romanov,natasha,romanov
1,janedoe@email.com,mary doe,mary,doe
2,johndoe@email.com,john doe,john,doe


In [224]:
df= df.append({'first':'Adam', 'last':'doe', 'email': 'adamdoe@amil.com'}, ignore_index=True)

  df= df.append({'first':'Adam', 'last':'doe', 'email': 'adamdoe@amil.com'}, ignore_index=True)


In [225]:
df.sort_values(by=['last', 'first'], ascending=[False,True])

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,chris schafer,chris,schafer
3,blackwidow@avenger.com,natasha romanov,natasha,romanov
4,adamdoe@amil.com,,Adam,doe
2,johndoe@email.com,john doe,john,doe
1,janedoe@email.com,mary doe,mary,doe


In [226]:
df.sort_index()

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,chris schafer,chris,schafer
1,janedoe@email.com,mary doe,mary,doe
2,johndoe@email.com,john doe,john,doe
3,blackwidow@avenger.com,natasha romanov,natasha,romanov
4,adamdoe@amil.com,,Adam,doe


In [227]:
df['last'].sort_values()

1        doe
2        doe
4        doe
3    romanov
0    schafer
Name: last, dtype: object

# Part 9: Cleaning Data - Casting Datatypes and Handling Missing Values

In [228]:
df

Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,chris schafer,chris,schafer
1,janedoe@email.com,mary doe,mary,doe
2,johndoe@email.com,john doe,john,doe
3,blackwidow@avenger.com,natasha romanov,natasha,romanov
4,adamdoe@amil.com,,Adam,doe


In [229]:
import numpy as np

In [230]:
df= df.append({'first': np.nan , 'last':None , 'email': 'NA'}, ignore_index=True)
df

  df= df.append({'first': np.nan , 'last':None , 'email': 'NA'}, ignore_index=True)


Unnamed: 0,email,full_name,first,last
0,coreymschafer@gmail.com,chris schafer,chris,schafer
1,janedoe@email.com,mary doe,mary,doe
2,johndoe@email.com,john doe,john,doe
3,blackwidow@avenger.com,natasha romanov,natasha,romanov
4,adamdoe@amil.com,,Adam,doe
5,,,,


In [231]:
df= df.append({'first': np.nan , 'last':None , 'email': 'NA', 'age': 55}, ignore_index=True)
df

  df= df.append({'first': np.nan , 'last':None , 'email': 'NA', 'age': 55}, ignore_index=True)


Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,
1,janedoe@email.com,mary doe,mary,doe,
2,johndoe@email.com,john doe,john,doe,
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,
4,adamdoe@amil.com,,Adam,doe,
5,,,,,
6,,,,,55.0


In [233]:
f = df['email'] == 'coreymschafer@gmail.com'
df[f]

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,


In [235]:
df.loc[f,'age'] = 51
df

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,
2,johndoe@email.com,john doe,john,doe,
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,
4,adamdoe@amil.com,,Adam,doe,
5,,,,,
6,,,,,55.0


In [236]:
f = df['email'] == 'blackwidow@avenger.com'
f

0    False
1    False
2    False
3     True
4    False
5    False
6    False
Name: email, dtype: bool

In [237]:
df.loc[f,'age'] = 28
df

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,
2,johndoe@email.com,john doe,john,doe,
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,adamdoe@amil.com,,Adam,doe,
5,,,,,
6,,,,,55.0


In [238]:
f = df['last'] == 'doe'
f

0    False
1     True
2     True
3    False
4     True
5    False
6    False
Name: last, dtype: bool

In [239]:
df.loc[f,'age'] = 35
df

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,adamdoe@amil.com,,Adam,doe,35.0
5,,,,,
6,,,,,55.0


In [244]:
df.iloc[4]['age'] = 'Missing'
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.iloc[4]['age'] = 'Missing'


Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,Missing,Missing,Missing,Missing,Missing
5,,,,,
6,,,,,55.0


In [245]:
df.dropna()

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,Missing,Missing,Missing,Missing,Missing


In [246]:
df.dropna(axis='index', how='any')


Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,Missing,Missing,Missing,Missing,Missing


In [247]:
df.dropna(axis='index', how='all')

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,Missing,Missing,Missing,Missing,Missing
5,,,,,
6,,,,,55.0


In [248]:
f = df['email'] == 'Missing'

In [249]:
df.loc[f,'age'] = np.nan
df

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,Missing,Missing,Missing,Missing,
5,,,,,
6,,,,,55.0


In [251]:
df.dropna(axis='columns', how='all')

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,Missing,Missing,Missing,Missing,
5,,,,,
6,,,,,55.0


In [254]:
df.dropna(axis='index', how='any', subset=['age'])

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
6,,,,,55.0


In [255]:
df.dropna(axis='index', how='any', subset=['first'])

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,Missing,Missing,Missing,Missing,


In [257]:
df.dropna(axis='index', how='all', subset=['first', 'age'])

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,Missing,Missing,Missing,Missing,
6,,,,,55.0


In [258]:
df.dropna(axis='index', how='any', subset=['first', 'age'])

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0


In [259]:
df

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,Missing,Missing,Missing,Missing,
5,,,,,
6,,,,,55.0


In [261]:
df.replace('NA', np.nan, inplace=True)
df.replace('Missing', np.nan, inplace=True)
df

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,,,,,
5,,,,,
6,,,,,55.0


In [262]:
df.isna()

Unnamed: 0,email,full_name,first,last,age
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,True,True,True,True,True
5,True,True,True,True,True
6,True,True,True,True,False


In [263]:
df.fillna('Missing')

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,Missing,Missing,Missing,Missing,Missing
5,Missing,Missing,Missing,Missing,Missing
6,Missing,Missing,Missing,Missing,55.0


In [265]:
df['age'].fillna(0)


Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,,,,,
5,,,,,
6,,,,,55.0


In [267]:
df.dtypes

email         object
full_name     object
first         object
last          object
age          float64
dtype: object

In [269]:
df['age']= df['age'].astype(float)
df.dtypes

email         object
full_name     object
first         object
last          object
age          float64
dtype: object

In [270]:
df['age'].mean()

40.8

In [271]:
df

Unnamed: 0,email,full_name,first,last,age
0,coreymschafer@gmail.com,chris schafer,chris,schafer,51.0
1,janedoe@email.com,mary doe,mary,doe,35.0
2,johndoe@email.com,john doe,john,doe,35.0
3,blackwidow@avenger.com,natasha romanov,natasha,romanov,28.0
4,,,,,
5,,,,,
6,,,,,55.0
