In [105]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

people

{'first': ['Corey', 'Jane', 'John'],
 'last': ['Schafer', 'Doe', 'Doe'],
 'email': ['CoreyMSchafer@gmail.com',
  'JaneDoe@email.com',
  'JohnDoe@email.com']}

In [106]:
people['email']

['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']

In [107]:
import pandas as pd

## Esto es un data frame

In [108]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [109]:
df['email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

## Una Serie es un dataframe de una sola columna

In [110]:
type(df['email'])

pandas.core.series.Series

In [111]:
df.email

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
2          JohnDoe@email.com
Name: email, dtype: object

In [112]:
df.count()

first    3
last     3
email    3
dtype: int64

## Esto es otro data frame

In [113]:
df[['first','email']]

Unnamed: 0,first,email
0,Corey,CoreyMSchafer@gmail.com
1,Jane,JaneDoe@email.com
2,John,JohnDoe@email.com


In [114]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

## Loc & iLoc

In [115]:
df.iloc[0]

first                      Corey
last                     Schafer
email    CoreyMSchafer@gmail.com
Name: 0, dtype: object

In [116]:
df.iloc[[0,1]]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com


In [117]:
df.iloc[[1,2]]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [118]:
df.iloc[[1,2],2]

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

In [119]:
df.iloc[[1,2],[1.2]]

Unnamed: 0,last
1,Doe
2,Doe


In [120]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [121]:
df.loc[0]

first                      Corey
last                     Schafer
email    CoreyMSchafer@gmail.com
Name: 0, dtype: object

In [122]:
df.loc[[0,1]]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com


In [123]:
df.loc[[0,1],'email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
Name: email, dtype: object

In [124]:
df.loc[[0,1],['email', 'last']]

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,JaneDoe@email.com,Doe


In [125]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


# Ch 4 Filters

In [126]:
df['last'] == 'Doe'

0    False
1     True
2     True
Name: last, dtype: bool

In [127]:
filt = (df['last'] == 'Doe')

In [128]:
df[filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [129]:
df.loc[filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [130]:
df.loc[filt, 'email']

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

### And (&)

In [131]:
filt = (df['last'] == 'Doe') & (df['first'] == 'John' )

In [132]:
df.loc[filt, 'email']

2    JohnDoe@email.com
Name: email, dtype: object

### Or(|)

In [133]:
filt = (df['last'] == 'Schafer') | (df['first'] == 'John' )

In [134]:
df.loc[filt]

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
2,John,Doe,JohnDoe@email.com


### Nagación

In [135]:
df.loc[-filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com


# Part 5 Updating Rows and Columns - Modifying Data Within DataFrames

### Updating Columns

In [136]:
df.columns      

Index(['first', 'last', 'email'], dtype='object')

In [137]:
df.columns = ['first name', 'last name', 'email']
df.columns

Index(['first name', 'last name', 'email'], dtype='object')

In [138]:
df

Unnamed: 0,first name,last name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [139]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [140]:
df.columns = df.columns.str.replace(' ', '_')

In [141]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [142]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [145]:
df.rename(columns={'first_name':'first', 'last_name':'last' }, inplace=True)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


### Updating Rows

In [146]:
df.loc[2]

first                 John
last                   Doe
email    JohnDoe@email.com
Name: 2, dtype: object

In [148]:
df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnSmith@email.com


In [149]:
df.loc[2,['last', 'email']]

last                   Smith
email    JohnSmith@email.com
Name: 2, dtype: object

In [151]:
df.loc[2,['last', 'email']] = ['Doe', 'JohnDo@email.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDo@email.com


In [153]:
df.loc[2,[ 'email']] = 'JohnDos@email.com'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDos@email.com


In [155]:
df.at[2,'email'] = 'JohnDoe@email.com'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [157]:
filt = df['email'] == 'JohnDoe@email.com'
df[filt]['last']

2    Doe
Name: last, dtype: object

#### Sin usar loc mara error

In [158]:
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Smith'


In [159]:
df.loc[filt,'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [163]:
df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [164]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [165]:
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [166]:
def update_email(email):
    return email.upper()

In [167]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [168]:
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [169]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [170]:
len(df['email'])

3

In [171]:
df.apply(len, axis='rows')

first    3
last     3
email    3
dtype: int64

In [172]:
df.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

In [173]:
df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [174]:
df.apply(pd.Series.max)

first                 John
last                 Smith
email    johndoe@email.com
dtype: object

In [176]:
df.apply(lambda x: x.min())

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [177]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [178]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


In [179]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [180]:
df['first'].map({'Corey': 'Chris', 'Jane':'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [181]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [182]:
df['first'].replace({'Corey': 'Chris', 'Jane':'Mary'})

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [183]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [184]:
df['first']=df['first'].replace({'Corey': 'Chris', 'Jane':'Mary'})
df

Unnamed: 0,first,last,email
0,Chris,Schafer,coreymschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com
