### Here I am gonna to do some practice basic and important pandas function and attributes

In [1]:
import pandas as pd
import numpy as np

## Attributes and underlying data

In [2]:
data = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Age':[27, 24, 22, 32],
        'Address':['Delhi', 'Kanpur', 'Allahabad', 'Kannauj'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']}

df = pd.DataFrame(data=data)

In [3]:
df

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Delhi,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannauj,Phd


### 1) index

In [4]:
df.index

RangeIndex(start=0, stop=4, step=1)

### 2) columns

In [5]:
df.columns

Index(['Name', 'Age', 'Address', 'Qualification'], dtype='object')

### 3) dtypes

In [6]:
df.dtypes

Name             object
Age               int64
Address          object
Qualification    object
dtype: object

### 4) info

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Name           4 non-null      object
 1   Age            4 non-null      int64 
 2   Address        4 non-null      object
 3   Qualification  4 non-null      object
dtypes: int64(1), object(3)
memory usage: 256.0+ bytes


### 5) values

In [8]:
df.values

array([['Jai', 27, 'Delhi', 'Msc'],
       ['Princi', 24, 'Kanpur', 'MA'],
       ['Gaurav', 22, 'Allahabad', 'MCA'],
       ['Anuj', 32, 'Kannauj', 'Phd']], dtype=object)

### 6) axis

In [9]:
df.axes

[RangeIndex(start=0, stop=4, step=1),
 Index(['Name', 'Age', 'Address', 'Qualification'], dtype='object')]

### 7) size

In [10]:
df.size

16

### 8) ndim

In [11]:
df.ndim

2

### 9) shape

In [12]:
df.shape

(4, 4)

### 10) memory_useage

In [13]:
df.memory_usage

<bound method DataFrame.memory_usage of      Name  Age    Address Qualification
0     Jai   27      Delhi           Msc
1  Princi   24     Kanpur            MA
2  Gaurav   22  Allahabad           MCA
3    Anuj   32    Kannauj           Phd>

### 11) empty

In [14]:
df_empty = pd.DataFrame({'A' : []})

df_empty

Unnamed: 0,A


In [15]:
df_empty.empty

True

In [16]:
df2 = pd.DataFrame({'A' : [np.nan]})

df2

Unnamed: 0,A
0,


In [17]:
df2.dropna().empty

True

### 12) select_dtypes

In [18]:
df3 = pd.read_csv("Dataset/nba.csv")

df3.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0,PG,25,2-Jun,180,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99,SF,25,6-Jun,235,Marquette,6796117.0
2,John Holland,Boston Celtics,30,SG,27,5-Jun,205,Boston University,
3,R.J. Hunter,Boston Celtics,28,SG,22,5-Jun,185,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8,PF,29,10-Jun,231,,5000000.0


In [19]:
df3.dtypes

Name         object
Team         object
Number        int64
Position     object
Age           int64
Height       object
Weight        int64
College      object
Salary      float64
dtype: object

In [20]:
# Let’s use the dataframe.select_dtypes() function to select all columns having int64 data type in the dataframe.

df3.select_dtypes(include=['int64']).head()

Unnamed: 0,Number,Age,Weight
0,0,25,180
1,99,25,235
2,30,27,205
3,28,22,185
4,8,29,231


In [21]:
df3.select_dtypes(exclude=['float64']).head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College
0,Avery Bradley,Boston Celtics,0,PG,25,2-Jun,180,Texas
1,Jae Crowder,Boston Celtics,99,SF,25,6-Jun,235,Marquette
2,John Holland,Boston Celtics,30,SG,27,5-Jun,205,Boston University
3,R.J. Hunter,Boston Celtics,28,SG,22,5-Jun,185,Georgia State
4,Jonas Jerebko,Boston Celtics,8,PF,29,10-Jun,231,


## Conversion

### 13) astype

In [22]:
d = {'col1': [1, 2], 'col2': [3, 4]}

df4 = pd.DataFrame(data=d)

df4

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [23]:
df4.dtypes

col1    int64
col2    int64
dtype: object

In [24]:
# Now change the data types using astype

df4 = df4.astype(dtype='float64')

df4

Unnamed: 0,col1,col2
0,1.0,3.0
1,2.0,4.0


In [25]:
df4.dtypes

col1    float64
col2    float64
dtype: object

In [26]:
df5 = pd.DataFrame(data=d)
df5

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [27]:
# Change specific column data type

df5['col1'] = df5['col1'].astype(dtype='float64')

df5.dtypes

col1    float64
col2      int64
dtype: object

In [28]:
# Another way to change data type for a specific column

df5.astype(dtype={'col2':'int32'}).dtypes

col1    float64
col2      int32
dtype: object

### 14) convert_dtypes

In [29]:
df = pd.DataFrame(
    {
        "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
        "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
        "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
        "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
        "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
        "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
    }
)

df

Unnamed: 0,a,b,c,d,e,f
0,1,x,True,h,10.0,
1,2,y,False,i,,100.5
2,3,z,,,20.0,200.0


In [30]:
df.dtypes

a      int32
b     object
c     object
d     object
e    float64
f    float64
dtype: object

In [31]:
dfn = df.convert_dtypes()
dfn

Unnamed: 0,a,b,c,d,e,f
0,1,x,True,h,10.0,
1,2,y,False,i,,100.5
2,3,z,,,20.0,200.0


In [32]:
dfn.dtypes

a      Int32
b     string
c    boolean
d     string
e      Int64
f    Float64
dtype: object

### 15) infer_objects

### 16) copy()

In [33]:
s = pd.Series([1, 2], index=["a", "b"])
s

a    1
b    2
dtype: int64

In [34]:
# Default deep=True if you want ot shallow copy then make deep=False

s_copy = s.copy()
s_copy

a    1
b    2
dtype: int64

### 17) bool

In [35]:
pd.Series([True]).bool()

True

In [36]:
pd.Series([False]).bool()

False

In [37]:
pd.DataFrame({'col': [True]}).bool()

True

## Indexing, iteration

### 1) head()

In [38]:
data = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Age':[27, 24, 22, 32],
        'Address':['Delhi', 'Kanpur', 'Allahabad', 'Kannauj'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']}

df = pd.DataFrame(data=data)
df.head()

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Delhi,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannauj,Phd


In [39]:
df.head(2) # Default is 5 rows

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Delhi,Msc
1,Princi,24,Kanpur,MA


### 2) at

In [40]:
# Access a single value for a row/column label pair.
# Similar to loc, in that both provide label-based lookups.
# Use at if you only need to get or set a single value in a DataFrame or Series.

df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], index=[4, 5, 6], columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
4,0,2,3
5,0,4,1
6,10,20,30


In [41]:
df.at[4, 'A']

0

In [42]:
df.at[6, 'C']

30

In [43]:
df.at[5, 'B'] = 100
df

Unnamed: 0,A,B,C
4,0,2,3
5,0,100,1
6,10,20,30


### 3) iat

In [44]:
# Access a single value for a row/column pair by integer position.

# Similar to iloc, in that both provide integer-based lookups.
# Use iat if you only need to get or set a single value in a DataFrame or Series.

df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], columns=['A', 'B', 'C'])

df

Unnamed: 0,A,B,C
0,0,2,3
1,0,4,1
2,10,20,30


In [45]:
df.iat[0,0]

0

In [46]:
df.iat[2,2]

30

In [47]:
df.iat[0,2] = 5000
df

Unnamed: 0,A,B,C
0,0,2,5000
1,0,4,1
2,10,20,30


### 4) loc

In [48]:
data = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Age':[27, 24, 22, 32],
        'Address':['Delhi', 'Kanpur', 'Allahabad', 'Kannauj'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']}
df = pd.DataFrame(data=data)
df

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Delhi,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannauj,Phd


In [49]:
df.loc[[0]]

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,27,Delhi,Msc


In [50]:
df.loc[0]

Name               Jai
Age                 27
Address          Delhi
Qualification      Msc
Name: 0, dtype: object

In [51]:
df.loc[2, 'Age']

22

In [52]:
df.loc[:, 'Name']

0       Jai
1    Princi
2    Gaurav
3      Anuj
Name: Name, dtype: object

In [53]:
df.loc[[0,1],['Name','Address','Qualification']]

Unnamed: 0,Name,Address,Qualification
0,Jai,Delhi,Msc
1,Princi,Kanpur,MA


In [54]:
df.loc[0, 'Age'] = 32
df

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,32,Delhi,Msc
1,Princi,24,Kanpur,MA
2,Gaurav,22,Allahabad,MCA
3,Anuj,32,Kannauj,Phd


In [55]:
df.loc[df['Age'] > 30]

Unnamed: 0,Name,Age,Address,Qualification
0,Jai,32,Delhi,Msc
3,Anuj,32,Kannauj,Phd


In [56]:
df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
     index=['cobra', 'viper', 'sidewinder'],
     columns=['max_speed', 'shield'])

df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,5
sidewinder,7,8


In [57]:
df.loc['cobra':'viper', 'max_speed']

cobra    1
viper    4
Name: max_speed, dtype: int64

In [58]:
df.loc[[False, False, True]]

Unnamed: 0,max_speed,shield
sidewinder,7,8


In [59]:
df.loc[df['shield'] > 6, ['max_speed']]

Unnamed: 0,max_speed
sidewinder,7


In [60]:
df.loc[['viper', 'sidewinder'], ['shield']] = 50
df

Unnamed: 0,max_speed,shield
cobra,1,2
viper,4,50
sidewinder,7,50


In [61]:
# Set value for an entire row

df.loc['cobra'] = 10
df

Unnamed: 0,max_speed,shield
cobra,10,10
viper,4,50
sidewinder,7,50


In [62]:
# Set value for an entire column
df.loc[:, 'max_speed'] = 30
df

Unnamed: 0,max_speed,shield
cobra,30,10
viper,30,50
sidewinder,30,50


In [63]:
# Set value for rows matching callable condition
df.loc[df['shield'] > 35] = 0
df

Unnamed: 0,max_speed,shield
cobra,30,10
viper,0,0
sidewinder,0,0


### 5) iloc --> Integer location

In [64]:
mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
          {'a': 100, 'b': 200, 'c': 300, 'd': 400},
          {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
df5 = pd.DataFrame(mydict)
df5

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,100,200,300,400
2,1000,2000,3000,4000


In [65]:
df5.iloc[[0]]

Unnamed: 0,a,b,c,d
0,1,2,3,4


In [66]:
df5.iloc[0]

a    1
b    2
c    3
d    4
Name: 0, dtype: int64

In [67]:
df5.iloc[1, 3]

400

In [68]:
df5.iloc[[0, 2], [1, 3]]

Unnamed: 0,b,d
0,2,4
2,2000,4000


In [69]:
df5.iloc[1:3, 0:3]

Unnamed: 0,a,b,c
1,100,200,300
2,1000,2000,3000


## Combining / comparing / joining / merging

### 1) assign

In [70]:
df = pd.DataFrame(data={'temp_c': [17.0, 25.0]}, index=['Portland', 'Berkeley'])
df

Unnamed: 0,temp_c
Portland,17.0
Berkeley,25.0


In [71]:
# Assign new columns to a DataFrame.

df = df.assign(temp_f = lambda x: x.temp_c * 9 / 5 + 32)
df

# df.assign(temp_f = df['temp_c'] * 9 / 5 + 32) # this is another way

Unnamed: 0,temp_c,temp_f
Portland,17.0,62.6
Berkeley,25.0,77.0


In [72]:
df = df.assign(temp_f = lambda x: x['temp_c'] * 9 / 5 + 32, temp_k = lambda x: (x['temp_f'] +  459.67) * 5 / 9)

In [73]:
df

Unnamed: 0,temp_c,temp_f,temp_k
Portland,17.0,62.6,290.15
Berkeley,25.0,77.0,298.15


In [74]:
df = df.assign(name = lambda a:['alamin', 'tania'])
df

Unnamed: 0,temp_c,temp_f,temp_k,name
Portland,17.0,62.6,290.15,alamin
Berkeley,25.0,77.0,298.15,tania


### 2) compare

In [75]:
# Compare to another DataFrame and show the differences

df = pd.DataFrame(
    {
        "col1": ["a", "a", "b", "b", "a"],
        "col2": [1.0, 2.0, 3.0, np.nan, 5.0],
        "col3": [1.0, 2.0, 3.0, 4.0, 5.0]
    },  
)

df

Unnamed: 0,col1,col2,col3
0,a,1.0,1.0
1,a,2.0,2.0
2,b,3.0,3.0
3,b,,4.0
4,a,5.0,5.0


In [76]:
df2 = df.copy()

df2.loc[0, 'col1'] = 'c'

df2.loc[2, 'col3'] = 4.0

df2

Unnamed: 0,col1,col2,col3
0,c,1.0,1.0
1,a,2.0,2.0
2,b,3.0,4.0
3,b,,4.0
4,a,5.0,5.0


In [77]:
# Align the differences on columns
df.compare(other=df2)

Unnamed: 0_level_0,col1,col1,col3,col3
Unnamed: 0_level_1,self,other,self,other
0,a,c,,
2,,,3.0,4.0


In [78]:
# Stack the differences on rows
df.compare(df2, align_axis=0)

Unnamed: 0,Unnamed: 1,col1,col3
0,self,a,
0,other,c,
2,self,,3.0
2,other,,4.0


In [79]:
# Keep the equal values

df.compare(df2, keep_equal=True)

Unnamed: 0_level_0,col1,col1,col3,col3
Unnamed: 0_level_1,self,other,self,other
0,a,c,1.0,1.0
2,b,b,3.0,4.0


In [80]:
# Keep all original rows and columns

df.compare(df2, keep_shape=True)

Unnamed: 0_level_0,col1,col1,col2,col2,col3,col3
Unnamed: 0_level_1,self,other,self,other,self,other
0,a,c,,,,
1,,,,,,
2,,,,,3.0,4.0
3,,,,,,
4,,,,,,


In [81]:
# Keep all original rows and columns and also all original values

df.compare(df2, keep_shape=True, keep_equal=True)

Unnamed: 0_level_0,col1,col1,col2,col2,col3,col3
Unnamed: 0_level_1,self,other,self,other,self,other
0,a,c,1.0,1.0,1.0,1.0
1,a,a,2.0,2.0,2.0,2.0
2,b,b,3.0,3.0,3.0,4.0
3,b,b,,,4.0,4.0
4,a,a,5.0,5.0,5.0,5.0


### 3) join()

In [82]:
# join(other, on=None, how='left', lsuffix='', rsuffix='', sort=False)

# Join columns of another DataFrame.

# Join columns with other DataFrame either on index or on a key column. 
# Efficiently join multiple DataFrame objects by index at once by passing a list.

data1 = {
  "name": ["Sally", "Mary", "John"],
  "age": [50, 40, 30]
}

data2 = {
  "qualified": [True, False, False]
}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

In [83]:
df1

Unnamed: 0,name,age
0,Sally,50
1,Mary,40
2,John,30


In [84]:
df2

Unnamed: 0,qualified
0,True
1,False
2,False


In [85]:
newdf = df1.join(other=df2)
newdf

Unnamed: 0,name,age,qualified
0,Sally,50,True
1,Mary,40,False
2,John,30,False


In [86]:
df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
                   'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})
df

Unnamed: 0,key,A
0,K0,A0
1,K1,A1
2,K2,A2
3,K3,A3
4,K4,A4
5,K5,A5


In [87]:
other = pd.DataFrame({'key': ['K0', 'K1', 'K2'],
                      'B': ['B0', 'B1', 'B2']})
other

Unnamed: 0,key,B
0,K0,B0
1,K1,B1
2,K2,B2


In [88]:
# Join DataFrames using their indexes.

df.join(other=other, lsuffix='_caller', rsuffix='_other')

Unnamed: 0,key_caller,A,key_other,B
0,K0,A0,K0,B0
1,K1,A1,K1,B1
2,K2,A2,K2,B2
3,K3,A3,,
4,K4,A4,,
5,K5,A5,,


In [89]:
df.join(other=other.set_index('key'), on='key')

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,
4,K4,A4,
5,K5,A5,


In [90]:
df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando"],
    "temperature": [21,14,35],
})
df1.set_index('city',inplace=True)
df1

Unnamed: 0_level_0,temperature
city,Unnamed: 1_level_1
new york,21
chicago,14
orlando,35


In [91]:
df2 = pd.DataFrame({
    "city": ["chicago","new york","orlando"],
    "humidity": [65,68,75],
})
df2.set_index('city',inplace=True)
df2

Unnamed: 0_level_0,humidity
city,Unnamed: 1_level_1
chicago,65
new york,68
orlando,75


In [92]:
df1.join(df2,lsuffix='_l', rsuffix='_r')

Unnamed: 0_level_0,temperature,humidity
city,Unnamed: 1_level_1,Unnamed: 2_level_1
new york,21,68
chicago,14,65
orlando,35,75


### 4) merge()

In [93]:
df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
                    'value': [1, 2, 3, 5]})

df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'],
                    'value': [5, 6, 7, 8]})

In [94]:
df1

Unnamed: 0,lkey,value
0,foo,1
1,bar,2
2,baz,3
3,foo,5


In [95]:
df2

Unnamed: 0,rkey,value
0,foo,5
1,bar,6
2,baz,7
3,foo,8


In [96]:
# Merge df1 and df2 on the lkey and rkey columns. The value columns have the default suffixes, _x and _y, appended.

df1.merge(right=df2, left_on='lkey', right_on='rkey')

Unnamed: 0,lkey,value_x,rkey,value_y
0,foo,1,foo,5
1,foo,1,foo,8
2,foo,5,foo,5
3,foo,5,foo,8
4,bar,2,bar,6
5,baz,3,baz,7


In [97]:
df1.merge(right=df2, left_on='lkey', right_on='rkey', suffixes=('_left', '_right'))

Unnamed: 0,lkey,value_left,rkey,value_right
0,foo,1,foo,5
1,foo,1,foo,8
2,foo,5,foo,5
3,foo,5,foo,8
4,bar,2,bar,6
5,baz,3,baz,7


In [98]:
df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
df1

Unnamed: 0,a,b
0,foo,1
1,bar,2


In [99]:
df2

Unnamed: 0,a,c
0,foo,3
1,baz,4


In [100]:
df1.merge(right=df2, how='inner', on='a')

Unnamed: 0,a,b,c
0,foo,1,3


In [101]:
df1.merge(df2, how='left', on='a')

Unnamed: 0,a,b,c
0,foo,1,3.0
1,bar,2,


In [102]:
df1.merge(df2, how='right', on='a')

Unnamed: 0,a,b,c
0,foo,1.0,3
1,baz,,4


In [103]:
df1 = pd.DataFrame({'left': ['foo', 'bar']})
df2 = pd.DataFrame({'right': [7, 8]})
df1

Unnamed: 0,left
0,foo
1,bar


In [104]:
df2

Unnamed: 0,right
0,7
1,8


In [105]:
df1.merge(right=df2, how='cross')

Unnamed: 0,left,right
0,foo,7
1,foo,8
2,bar,7
3,bar,8


### 5) update()

In [106]:
# DataFrame.update(other, join='left', overwrite=True, filter_func=None, errors='ignore')

# Modify in place using non-NA values from another DataFrame.

df = pd.DataFrame({'A': [1, 2, 3],
                   'B': [400, 500, 600]})
new_df = pd.DataFrame({'B': [4, 5, 6],
                       'C': [7, 8, 9]})

In [107]:
df

Unnamed: 0,A,B
0,1,400
1,2,500
2,3,600


In [108]:
new_df

Unnamed: 0,B,C
0,4,7
1,5,8
2,6,9


In [109]:
df.update(other=new_df)

In [110]:
df
# The DataFrame’s length does not increase as a result of the update, only values at matching index/column labels are updated.

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [111]:
df = pd.DataFrame({'A': ['a', 'b', 'c'],
                   'B': ['x', 'y', 'z']})

new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})

df

Unnamed: 0,A,B
0,a,x
1,b,y
2,c,z


In [112]:
new_df

Unnamed: 0,B
0,d
1,e
2,f
3,g
4,h
5,i


In [113]:
df.update(new_df)

In [114]:
df

Unnamed: 0,A,B
0,a,d
1,b,e
2,c,f


In [115]:
# For Series, its name attribute must be set.

df = pd.DataFrame({'A': ['a', 'b', 'c'],
                   'B': ['x', 'y', 'z']})

new_column = pd.Series(['d', 'e'], name='B', index=[0, 2])

df

Unnamed: 0,A,B
0,a,x
1,b,y
2,c,z


In [116]:
new_column

0    d
2    e
Name: B, dtype: object

In [117]:
df.update(new_column)
df

Unnamed: 0,A,B
0,a,d
1,b,y
2,c,e


In [118]:
df = pd.DataFrame({'A': ['a', 'b', 'c'],
                   'B': ['x', 'y', 'z']})

new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2])

df

Unnamed: 0,A,B
0,a,x
1,b,y
2,c,z


In [119]:
new_df

Unnamed: 0,B
1,d
2,e


In [120]:
df.update(new_df)
df

Unnamed: 0,A,B
0,a,x
1,b,d
2,c,e


In [121]:
# If other contains NaNs the corresponding values are not updated in the original dataframe.

df = pd.DataFrame({'A': [1, 2, 3],
                   'B': [400, 500, 600]})
new_df = pd.DataFrame({'B': [4, np.nan, 6]})

df

Unnamed: 0,A,B
0,1,400
1,2,500
2,3,600


In [122]:
new_df

Unnamed: 0,B
0,4.0
1,
2,6.0


In [123]:
df.update(new_df)
df

Unnamed: 0,A,B
0,1,4.0
1,2,500.0
2,3,6.0


## Missing data handling