In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
%matplotlib inline

In [12]:
df = pd.DataFrame({'A':[1,2,np.nan],'B':[5,np.nan,np.nan],'C':[1,2,3]})
df['States']="CA NV AZ".split()
df.set_index('States',inplace=True)
df

Unnamed: 0_level_0,A,B,C
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CA,1.0,5.0,1
NV,2.0,,2
AZ,,,3


In [3]:
df

Unnamed: 0_level_0,A,B,C
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CA,1.0,5.0,1
NV,2.0,,2
AZ,,,3


In [4]:
#Require that many non-NA values.
df.dropna(thresh = 3,axis=1,inplace=True)

In [5]:
print("\nDropping any rows with a NaN value\n",'-'*35, sep='')
print(df.dropna(axis=0))



Dropping any rows with a NaN value
-----------------------------------
        C
States   
CA      1
NV      2
AZ      3


In [6]:
print("\nDropping any column with a NaN value\n",'-'*35, sep='')
print(df.dropna(axis=1))



Dropping any column with a NaN value
-----------------------------------
        C
States   
CA      1
NV      2
AZ      3


In [7]:
print("\nDropping a row with a minimum 2 NaN value using 'thresh' parameter\n",'-'*68, sep='')
print(df.dropna(axis=0, thresh=4))


Dropping a row with a minimum 2 NaN value using 'thresh' parameter
--------------------------------------------------------------------
Empty DataFrame
Columns: [C]
Index: []


In [8]:
df

Unnamed: 0_level_0,C
States,Unnamed: 1_level_1
CA,1
NV,2
AZ,3


In [9]:
df.fillna(2)

Unnamed: 0_level_0,C
States,Unnamed: 1_level_1
CA,1
NV,2
AZ,3


In [10]:
print("\nFilling values with a default value\n",'-'*35, sep='')
print(df.fillna(value='FILL VALUE'))



Filling values with a default value
-----------------------------------
        C
States   
CA      1
NV      2
AZ      3


In [13]:
df.fillna(value=df['A'].mean())

Unnamed: 0_level_0,A,B,C
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CA,1.0,5.0,1
NV,2.0,1.5,2
AZ,1.5,1.5,3


In [14]:
print("\nFilling values with a computed value (mean of column A here)\n",'-'*60, sep='')
print(df.fillna(value=df['A'].mean()))


Filling values with a computed value (mean of column A here)
------------------------------------------------------------
          A    B  C
States             
CA      1.0  5.0  1
NV      2.0  1.5  2
AZ      1.5  1.5  3


In [15]:
# Create dataframe
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]}
df = pd.DataFrame(data)
df

Unnamed: 0,Company,Person,Sales
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,340
3,MSFT,Vanessa,124
4,FB,Carl,243
5,FB,Sarah,350


In [16]:
g = df.groupby('Company')
g

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001C5A74975B0>

In [17]:
byComp = df.groupby('Company')
print("\nGrouping by 'Company' column and listing mean sales\n",'-'*55, sep='')
print(byComp.mean())



Grouping by 'Company' column and listing mean sales
-------------------------------------------------------
         Sales
Company       
FB       296.5
GOOG     160.0
MSFT     232.0


In [18]:
print("\nGrouping by 'Company' column and listing sum of sales\n",'-'*55, sep='')
print(byComp.sum())



Grouping by 'Company' column and listing sum of sales
-------------------------------------------------------
         Sales
Company       
FB         593
GOOG       320
MSFT       464


In [19]:
print("\nAll in one line of command (Stats for 'FB')\n",'-'*65, sep='')
print(pd.DataFrame(df.groupby('Company').describe().loc['FB']).transpose())



All in one line of command (Stats for 'FB')
-----------------------------------------------------------------
   Sales                                                       
   count   mean        std    min     25%    50%     75%    max
FB   2.0  296.5  75.660426  243.0  269.75  296.5  323.25  350.0


In [20]:
df.groupby("Company").describe().iloc[1:3,1:3]

Unnamed: 0_level_0,Sales,Sales
Unnamed: 0_level_1,mean,std
Company,Unnamed: 1_level_2,Unnamed: 2_level_2
GOOG,160.0,56.568542
MSFT,232.0,152.735065


In [21]:
df.groupby('Company').describe().loc['GOOG']['Sales']['std']


56.568542494923804

In [22]:
df.groupby('Company').describe()['Sales'].loc[['GOOG','MSFT'],['mean','std']]

Unnamed: 0_level_0,mean,std
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
GOOG,160.0,56.568542
MSFT,232.0,152.735065


In [23]:
d = (df.groupby('Company').describe())
df1 = d.T
df1

Unnamed: 0,Company,FB,GOOG,MSFT
Sales,count,2.0,2.0,2.0
Sales,mean,296.5,160.0,232.0
Sales,std,75.660426,56.568542,152.735065
Sales,min,243.0,120.0,124.0
Sales,25%,269.75,140.0,178.0
Sales,50%,296.5,160.0,232.0
Sales,75%,323.25,180.0,286.0
Sales,max,350.0,200.0,340.0


In [24]:
df1.loc['Sales'].loc['mean']

Company
FB      296.5
GOOG    160.0
MSFT    232.0
Name: mean, dtype: float64

In [25]:
(pd.DataFrame(df.groupby('Company').describe().loc['FB'])).transpose()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0


In [26]:
(pd.DataFrame(df.groupby('Company').describe().loc['FB'])).transpose()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0


In [27]:
print("\nSame type of extraction with little different command\n",'-'*68, sep='')
print(df.groupby('Company').describe().loc[['GOOG', 'MSFT']])


Same type of extraction with little different command
--------------------------------------------------------------------
        Sales                                                      
        count   mean         std    min    25%    50%    75%    max
Company                                                            
GOOG      2.0  160.0   56.568542  120.0  140.0  160.0  180.0  200.0
MSFT      2.0  232.0  152.735065  124.0  178.0  232.0  286.0  340.0


In [28]:
# Merging two data frames
# Creating data frames
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3'],
                        'C': ['C0', 'C1', 'C2', 'C3'],
                        'D': ['D0', 'D1', 'D2', 'D3']},
                        index=[0, 1, 2, 3])


In [29]:
df1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [30]:
df2 = pd.DataFrame({'AD': ['A4', 'A5', 'A6', 'A7'],
                        'B': ['B4', 'B5', 'B6', 'B7'],
                        'C': ['C4', 'C5', 'C6', 'C7'],
                        'D': ['D4', 'D5', 'D6', 'D7']},
                         index=[0, 1, 2, 3])


In [31]:
df2

Unnamed: 0,AD,B,C,D
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


In [32]:
df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
                        'B': ['B8', 'B9', 'B10', 'B11'],
                        'C': ['C8', 'C9', 'C10', 'C11'],
                        'D': ['D8', 'D9', 'D10', 'D11']},
                        index=[0, 1, 2, 3])


In [33]:
df3

Unnamed: 0,A,B,C,D
0,A8,B8,C8,D8
1,A9,B9,C9,D9
2,A10,B10,C10,D10
3,A11,B11,C11,D11


In [34]:
print("\nThe DataFrame number 1\n",'-'*30, sep='')
print(df1)



The DataFrame number 1
------------------------------
    A   B   C   D
0  A0  B0  C0  D0
1  A1  B1  C1  D1
2  A2  B2  C2  D2
3  A3  B3  C3  D3


In [35]:
print("\nThe DataFrame number 2\n",'-'*30, sep='')
print(df2)



The DataFrame number 2
------------------------------
   AD   B   C   D
0  A4  B4  C4  D4
1  A5  B5  C5  D5
2  A6  B6  C6  D6
3  A7  B7  C7  D7


In [36]:
print("\nThe DataFrame number 3\n",'-'*30, sep='')
print(df3)


The DataFrame number 3
------------------------------
     A    B    C    D
0   A8   B8   C8   D8
1   A9   B9   C9   D9
2  A10  B10  C10  D10
3  A11  B11  C11  D11


In [37]:
#concatenation
df_cat1 = pd.concat([df1,df2,df3], axis=1)
print("\nAfter concatenation along row\n",'-'*30, sep='')
df_cat1


After concatenation along row
------------------------------


Unnamed: 0,A,B,C,D,AD,B.1,C.1,D.1,A.1,B.2,C.2,D.2
0,A0,B0,C0,D0,A4,B4,C4,D4,A8,B8,C8,D8
1,A1,B1,C1,D1,A5,B5,C5,D5,A9,B9,C9,D9
2,A2,B2,C2,D2,A6,B6,C6,D6,A10,B10,C10,D10
3,A3,B3,C3,D3,A7,B7,C7,D7,A11,B11,C11,D11


In [38]:
df_cat1.iloc[3]

A      A3
B      B3
C      C3
D      D3
AD     A7
B      B7
C      C7
D      D7
A     A11
B     B11
C     C11
D     D11
Name: 3, dtype: object

In [40]:
df_cat2 = pd.concat([df1,df2,df3], axis=1)
print("\nAfter concatenation along column\n",'-'*60, sep='')
print(df_cat2)



After concatenation along column
------------------------------------------------------------
    A   B   C   D  AD   B   C   D    A    B    C    D
0  A0  B0  C0  D0  A4  B4  C4  D4   A8   B8   C8   D8
1  A1  B1  C1  D1  A5  B5  C5  D5   A9   B9   C9   D9
2  A2  B2  C2  D2  A6  B6  C6  D6  A10  B10  C10  D10
3  A3  B3  C3  D3  A7  B7  C7  D7  A11  B11  C11  D11


In [41]:
df_cat1.fillna(value=0, inplace=True)
print("\nAfter filling missing values with zero\n",'-'*60, sep='')
print(df_cat1)


After filling missing values with zero
------------------------------------------------------------
    A   B   C   D  AD   B   C   D    A    B    C    D
0  A0  B0  C0  D0  A4  B4  C4  D4   A8   B8   C8   D8
1  A1  B1  C1  D1  A5  B5  C5  D5   A9   B9   C9   D9
2  A2  B2  C2  D2  A6  B6  C6  D6  A10  B10  C10  D10
3  A3  B3  C3  D3  A7  B7  C7  D7  A11  B11  C11  D11


In [42]:
# merging by a common key

In [43]:
df1 = pd.DataFrame({'key1': ['K0', 'K8', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                    'C': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
   
df2 = pd.DataFrame({'key2': ['K0', 'K1', 'K2', 'K3'],
                          'C': ['C0', 'C1', 'C2', 'C3'],
                          'D': ['D0', 'D1', 'D2', 'D3']})


In [44]:
df1

Unnamed: 0,key1,A,C,B
0,K0,A0,A0,B0
1,K8,A1,A1,B1
2,K2,A2,A2,B2
3,K3,A3,A3,B3


In [45]:
df2

Unnamed: 0,key2,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3


In [46]:
pd.merge(df1,df2,how='left',left_on = 'key1',right_on='key2')

Unnamed: 0,key1,A,C_x,B,key2,C_y,D
0,K0,A0,A0,B0,K0,C0,D0
1,K8,A1,A1,B1,,,
2,K2,A2,A2,B2,K2,C2,D2
3,K3,A3,A3,B3,K3,C3,D3


In [47]:
merge1= pd.merge(left,right,how='inner',on='key')
print("\nAfter simple merging with 'inner' method\n",'-'*50, sep='')
print(merge1)

NameError: name 'left' is not defined

In [48]:
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
                     'key2': ['K0', 'K1', 'K0', 'K1'],
                        'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3']})
    
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
                               'key2': ['K0', 'K0', 'K0', 'K0'],
                                  'C': ['C0', 'C1', 'C2', 'C3'],
                                  'D': ['D0', 'D1', 'D2', 'D3']})

In [49]:
left

Unnamed: 0,key1,key2,A,B
0,K0,K0,A0,B0
1,K0,K1,A1,B1
2,K1,K0,A2,B2
3,K2,K1,A3,B3


In [50]:
right

Unnamed: 0,key1,key2,C,D
0,K0,K0,C0,D0
1,K1,K0,C1,D1
2,K1,K0,C2,D2
3,K2,K0,C3,D3


In [51]:
pd.merge(left, right, on=['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,A2,B2,C1,D1
2,K1,K0,A2,B2,C2,D2


In [52]:
pd.merge(left, right, how='left',on=['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K0,K1,A1,B1,,
2,K1,K0,A2,B2,C1,D1
3,K1,K0,A2,B2,C2,D2
4,K2,K1,A3,B3,,


In [53]:
pd.merge(left, right, how='right',on=['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,A2,B2,C1,D1
2,K1,K0,A2,B2,C2,D2
3,K2,K0,,,C3,D3


In [54]:
#join operators
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                      index=['K0', 'K1', 'K2']) 

right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                    'D': ['D0', 'D2', 'D3']},
                      index=['K0', 'K2', 'K3'])

In [55]:
left

Unnamed: 0,A,B
K0,A0,B0
K1,A1,B1
K2,A2,B2


In [56]:
right

Unnamed: 0,C,D
K0,C0,D0
K2,C2,D2
K3,C3,D3


In [57]:
left.join(right)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [58]:
left.join(right, how='outer')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


In [59]:
d1 = pd.DataFrame({'key1': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
   
d2 = pd.DataFrame({'key2': ['K0', 'K1', 'K2', 'K3'],
                          'C': ['C0', 'C1', 'C2', 'C3'],
                          'D': ['D0', 'D1', 'D2', 'D3']})
d3 = pd.DataFrame({'key3': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
df_cat1 = pd.concat([d1,d2,d3], axis=1)  
df_cat1 = pd.concat([d1,d2,d3])
merge1= pd.merge(d1,d2,how='outer',left_on='key1',right_on='key2')



In [60]:
merge1

Unnamed: 0,key1,A,B,key2,C,D
0,K0,A0,B0,K0,C0,D0
1,K1,A1,B1,K1,C1,D1
2,K2,A2,B2,K2,C2,D2
3,K3,A3,B3,K3,C3,D3


In [61]:
df_cat1 = pd.concat([d1,d2,d3])

In [62]:
df_cat1 = pd.concat([d1,d2,d3], axis=1)  

In [63]:
# use of apply functions

In [64]:
# Define a function
def testfunc(x):
    if (x> 500):
        return (10*np.log10(x))
    else:
        return (x/10)

In [65]:
df = pd.DataFrame({'col1':[1,2,3,4,5,6,7,8,9,10],
                   'col2':[444,555,666,444,333,222,666,777,666,555],
                   'col3':'aaa bb c dd eeee fff gg h iii j'.split()})
df.columns

Index(['col1', 'col2', 'col3'], dtype='object')

In [66]:
df['col20'] = df['col2'].apply(testfunc)
df

Unnamed: 0,col1,col2,col3,col20
0,1,444,aaa,44.4
1,2,555,bb,27.44293
2,3,666,c,28.234742
3,4,444,dd,44.4
4,5,333,eeee,33.3
5,6,222,fff,22.2
6,7,666,gg,28.234742
7,8,777,h,28.90421
8,9,666,iii,28.234742
9,10,555,j,27.44293


In [67]:
df['FuncApplied'] = df['col2'].apply(lambda x : np.log(x))
print(df)

   col1  col2  col3      col20  FuncApplied
0     1   444   aaa  44.400000     6.095825
1     2   555    bb  27.442930     6.318968
2     3   666     c  28.234742     6.501290
3     4   444    dd  44.400000     6.095825
4     5   333  eeee  33.300000     5.808142
5     6   222   fff  22.200000     5.402677
6     7   666    gg  28.234742     6.501290
7     8   777     h  28.904210     6.655440
8     9   666   iii  28.234742     6.501290
9    10   555     j  27.442930     6.318968


In [68]:
df['col3length']= df['col3'].apply(len)
print(df)

   col1  col2  col3      col20  FuncApplied  col3length
0     1   444   aaa  44.400000     6.095825           3
1     2   555    bb  27.442930     6.318968           2
2     3   666     c  28.234742     6.501290           1
3     4   444    dd  44.400000     6.095825           2
4     5   333  eeee  33.300000     5.808142           4
5     6   222   fff  22.200000     5.402677           3
6     7   666    gg  28.234742     6.501290           2
7     8   777     h  28.904210     6.655440           1
8     9   666   iii  28.234742     6.501290           3
9    10   555     j  27.442930     6.318968           1


In [69]:
df['test']= df['FuncApplied'].apply(lambda x: np.sqrt(x))
df

Unnamed: 0,col1,col2,col3,col20,FuncApplied,col3length,test
0,1,444,aaa,44.4,6.095825,3,2.468972
1,2,555,bb,27.44293,6.318968,2,2.513756
2,3,666,c,28.234742,6.50129,1,2.549763
3,4,444,dd,44.4,6.095825,2,2.468972
4,5,333,eeee,33.3,5.808142,4,2.410009
5,6,222,fff,22.2,5.402677,3,2.324366
6,7,666,gg,28.234742,6.50129,2,2.549763
7,8,777,h,28.90421,6.65544,1,2.579814
8,9,666,iii,28.234742,6.50129,3,2.549763
9,10,555,j,27.44293,6.318968,1,2.513756


In [70]:
print("\nSum of the column 'FuncApplied' is: ",df['FuncApplied'].sum())



Sum of the column 'FuncApplied' is:  62.19971458619886


In [71]:
print("Mean of the column 'FuncApplied' is: ",df['FuncApplied'].mean())


Mean of the column 'FuncApplied' is:  6.219971458619886


In [72]:
print("Std dev of the column 'FuncApplied' is: ",df['FuncApplied'].std())


Std dev of the column 'FuncApplied' is:  0.3822522801574853


In [73]:
print("Min and max of the column 'FuncApplied' are: ",df['FuncApplied'].min(),"and",df['FuncApplied'].max())

Min and max of the column 'FuncApplied' are:  5.402677381872279 and 6.655440350367647


In [74]:
### Deletion, sorting, list of column and row names

In [75]:
print("\nName of columns\n",'-'*20, sep='')
print(df.columns)



Name of columns
--------------------
Index(['col1', 'col2', 'col3', 'col20', 'FuncApplied', 'col3length', 'test'], dtype='object')


In [76]:
l = list(df.columns)
print("\nColumn names in a list of strings for later manipulation:",l)


Column names in a list of strings for later manipulation: ['col1', 'col2', 'col3', 'col20', 'FuncApplied', 'col3length', 'test']


In [77]:
print("\nDeleting last column by 'del' command\n",'-'*50, sep='')
del df['col3length']
print(df)
df['col3length']= df['col3'].apply(len)


Deleting last column by 'del' command
--------------------------------------------------
   col1  col2  col3      col20  FuncApplied      test
0     1   444   aaa  44.400000     6.095825  2.468972
1     2   555    bb  27.442930     6.318968  2.513756
2     3   666     c  28.234742     6.501290  2.549763
3     4   444    dd  44.400000     6.095825  2.468972
4     5   333  eeee  33.300000     5.808142  2.410009
5     6   222   fff  22.200000     5.402677  2.324366
6     7   666    gg  28.234742     6.501290  2.549763
7     8   777     h  28.904210     6.655440  2.579814
8     9   666   iii  28.234742     6.501290  2.549763
9    10   555     j  27.442930     6.318968  2.513756


In [78]:
df.sort_values(by='col2') #inplace=False by default

Unnamed: 0,col1,col2,col3,col20,FuncApplied,test,col3length
5,6,222,fff,22.2,5.402677,2.324366,3
4,5,333,eeee,33.3,5.808142,2.410009,4
0,1,444,aaa,44.4,6.095825,2.468972,3
3,4,444,dd,44.4,6.095825,2.468972,2
1,2,555,bb,27.44293,6.318968,2.513756,2
9,10,555,j,27.44293,6.318968,2.513756,1
2,3,666,c,28.234742,6.50129,2.549763,1
6,7,666,gg,28.234742,6.50129,2.549763,2
8,9,666,iii,28.234742,6.50129,2.549763,3
7,8,777,h,28.90421,6.65544,2.579814,1


In [79]:
df.sort_values(by='FuncApplied',ascending=False) #inplace=False by default

Unnamed: 0,col1,col2,col3,col20,FuncApplied,test,col3length
7,8,777,h,28.90421,6.65544,2.579814,1
2,3,666,c,28.234742,6.50129,2.549763,1
6,7,666,gg,28.234742,6.50129,2.549763,2
8,9,666,iii,28.234742,6.50129,2.549763,3
1,2,555,bb,27.44293,6.318968,2.513756,2
9,10,555,j,27.44293,6.318968,2.513756,1
0,1,444,aaa,44.4,6.095825,2.468972,3
3,4,444,dd,44.4,6.095825,2.468972,2
4,5,333,eeee,33.3,5.808142,2.410009,4
5,6,222,fff,22.2,5.402677,2.324366,3


In [80]:
df = pd.DataFrame({'col1':[1,2,3,np.nan],
                   'col2':[None,555,666,444],
                   'col3':['abc','def','ghi','xyz']})
df.head()

Unnamed: 0,col1,col2,col3
0,1.0,,abc
1,2.0,555.0,def
2,3.0,666.0,ghi
3,,444.0,xyz


In [81]:
df.isnull()

Unnamed: 0,col1,col2,col3
0,False,True,False
1,False,False,False
2,False,False,False
3,True,False,False


In [82]:
df.fillna('FILL')

Unnamed: 0,col1,col2,col3
0,1,FILL,abc
1,2,555,def
2,3,666,ghi
3,FILL,444,xyz


In [83]:
df1


Unnamed: 0,key1,A,C,B
0,K0,A0,A0,B0
1,K8,A1,A1,B1
2,K2,A2,A2,B2
3,K3,A3,A3,B3


In [84]:
df2

Unnamed: 0,key2,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3


In [85]:
df3

Unnamed: 0,A,B,C,D
0,A8,B8,C8,D8
1,A9,B9,C9,D9
2,A10,B10,C10,D10
3,A11,B11,C11,D11


In [86]:
pd.merge(df1, df2, how='inner')

Unnamed: 0,key1,A,C,B,key2,D


In [87]:
pd.merge(df1, df2, how='outer')

Unnamed: 0,key1,A,C,B,key2,D
0,K0,A0,A0,B0,,
1,K8,A1,A1,B1,,
2,K2,A2,A2,B2,,
3,K3,A3,A3,B3,,
4,,,C0,,K0,D0
5,,,C1,,K1,D1
6,,,C2,,K2,D2
7,,,C3,,K3,D3


In [88]:
pd.merge(df1, df2, how='left')

Unnamed: 0,key1,A,C,B,key2,D
0,K0,A0,A0,B0,,
1,K8,A1,A1,B1,,
2,K2,A2,A2,B2,,
3,K3,A3,A3,B3,,


In [89]:
pd.merge(df1, df2, how='right')

Unnamed: 0,key1,A,C,B,key2,D
0,,,C0,,K0,D0
1,,,C1,,K1,D1
2,,,C2,,K2,D2
3,,,C3,,K3,D3


In [90]:
pd.read_csv('https://raw.githubusercontent.com/PramodShenoy/911-Calls/master/911.csv')

Unnamed: 0,lat,lng,desc,zip,title,timeStamp,twp,addr,e
0,40.297876,-75.581294,REINDEER CT & DEAD END; NEW HANOVER; Station ...,19525.0,EMS: BACK PAINS/INJURY,2015-12-10 17:40:00,NEW HANOVER,REINDEER CT & DEAD END,1
1,40.258061,-75.264680,BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP...,19446.0,EMS: DIABETIC EMERGENCY,2015-12-10 17:40:00,HATFIELD TOWNSHIP,BRIAR PATH & WHITEMARSH LN,1
2,40.121182,-75.351975,HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St...,19401.0,Fire: GAS-ODOR/LEAK,2015-12-10 17:40:00,NORRISTOWN,HAWS AVE,1
3,40.116153,-75.343513,AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;...,19401.0,EMS: CARDIAC EMERGENCY,2015-12-10 17:40:01,NORRISTOWN,AIRY ST & SWEDE ST,1
4,40.251492,-75.603350,CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S...,,EMS: DIZZINESS,2015-12-10 17:40:01,LOWER POTTSGROVE,CHERRYWOOD CT & DEAD END,1
...,...,...,...,...,...,...,...,...,...
99487,40.132869,-75.333515,MARKLEY ST & W LOGAN ST; NORRISTOWN; 2016-08-2...,19401.0,Traffic: VEHICLE ACCIDENT -,2016-08-24 11:06:00,NORRISTOWN,MARKLEY ST & W LOGAN ST,1
99488,40.006974,-75.289080,LANCASTER AVE & RITTENHOUSE PL; LOWER MERION; ...,19003.0,Traffic: VEHICLE ACCIDENT -,2016-08-24 11:07:02,LOWER MERION,LANCASTER AVE & RITTENHOUSE PL,1
99489,40.115429,-75.334679,CHESTNUT ST & WALNUT ST; NORRISTOWN; Station ...,19401.0,EMS: FALL VICTIM,2016-08-24 11:12:00,NORRISTOWN,CHESTNUT ST & WALNUT ST,1
99490,40.186431,-75.192555,WELSH RD & WEBSTER LN; HORSHAM; Station 352; ...,19002.0,EMS: NAUSEA/VOMITING,2016-08-24 11:17:01,HORSHAM,WELSH RD & WEBSTER LN,1
