In [1]:
import pandas as pd
import numpy as np

In [2]:
def make_df(cols, ind):
    data = {c: [str(c) + str(i) for i in ind] for c in cols}
    return pd.DataFrame(data, ind)

In [3]:
df1, df2 = [make_df('ABCD', range(4))] * 2
df3 = make_df('ABCD', range(2,10))
df4 = make_df('BF', range(4))

In [4]:
pd.concat(
    [df1, df2],
    axis=0,
    join='outer',
    ignore_index=True,
    keys=None,
    levels=None,
    names=None,
    verify_integrity=False,
    sort=False,
    copy=True
)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A0,B0,C0,D0
5,A1,B1,C1,D1
6,A2,B2,C2,D2
7,A3,B3,C3,D3


In [5]:
df1.join(df4, on=None, how='left', lsuffix='x', rsuffix='', sort=False)

Unnamed: 0,A,Bx,C,D,B,F
0,A0,B0,C0,D0,B0,F0
1,A1,B1,C1,D1,B1,F1
2,A2,B2,C2,D2,B2,F2
3,A3,B3,C3,D3,B3,F3


In [6]:
pd.merge(df1, df4,
    how='inner',
    on=None,
    left_on=None,
    right_on=None,
    left_index=False,
    right_index=False,
    sort=False,
    suffixes=('_x', '_y'),
    copy=True,
    indicator=False,
    validate=None
)

Unnamed: 0,A,B,C,D,F
0,A0,B0,C0,D0,F0
1,A1,B1,C1,D1,F1
2,A2,B2,C2,D2,F2
3,A3,B3,C3,D3,F3


In [7]:
df1 = pd.DataFrame({'Name': ['Alice', 'Bob', 'Charlie'],
                    'Department': ['HR', 'Engineering', 'Sales']})


df2 = pd.DataFrame({'Name': ['Alice', 'Bob', 'David'],
                    'Salary': [5000, 6000, 5500]})

pd.merge(df1,df2, how='outer')

Unnamed: 0,Name,Department,Salary
0,Alice,HR,5000.0
1,Bob,Engineering,6000.0
2,Charlie,Sales,
3,David,,5500.0


In [8]:
df1 = pd.DataFrame({'employee': ['Bob', 'Jake', 'Lisa', 'Sue'],
                    'group': ['Accounting', 'Engineering', 'Engineering', 'HR']})
df2 = pd.DataFrame({'names': ['Lisa', 'Bob', 'Jake', 'Sue'],
                    'hire_date': [2004, 2008, 2012, 2014]})

df1, df2

(  employee        group
 0      Bob   Accounting
 1     Jake  Engineering
 2     Lisa  Engineering
 3      Sue           HR,
   names  hire_date
 0  Lisa       2004
 1   Bob       2008
 2  Jake       2012
 3   Sue       2014)

In [9]:
#notice that the pd.merge() function recognizes that each DataFrame has an employee column,
#and automatically joins using this column as a key. For more specific merges use right_on and left_on

#one to one join
df3 = pd.merge(df1, df2, left_on='employee', right_on='names')
df3

Unnamed: 0,employee,group,names,hire_date
0,Bob,Accounting,Bob,2008
1,Jake,Engineering,Jake,2012
2,Lisa,Engineering,Lisa,2004
3,Sue,HR,Sue,2014


In [10]:
#get rid of redundancy
df3.drop('names', axis=1)

Unnamed: 0,employee,group,hire_date
0,Bob,Accounting,2008
1,Jake,Engineering,2012
2,Lisa,Engineering,2004
3,Sue,HR,2014


In [11]:
df4 = pd.DataFrame({'group': ['Accounting', 'Engineering', 'HR'],
                    'supervisor': ['Carly', 'Guido', 'Steve']})
df4

Unnamed: 0,group,supervisor
0,Accounting,Carly
1,Engineering,Guido
2,HR,Steve


In [12]:
#many to one join
df5 = pd.merge(df3, df4)
df5

Unnamed: 0,employee,group,names,hire_date,supervisor
0,Bob,Accounting,Bob,2008,Carly
1,Jake,Engineering,Jake,2012,Guido
2,Lisa,Engineering,Lisa,2004,Guido
3,Sue,HR,Sue,2014,Steve


In [13]:
#many to many join
df6 = pd.DataFrame({'group': ['Accounting', 'Accounting',
                              'Engineering', 'Engineering', 'HR', 'HR'],
                    'skills': ['math', 'spreadsheets', 'coding', 'linux',
                               'spreadsheets', 'organization']})
display(df1,df6)
pass

Unnamed: 0,employee,group
0,Bob,Accounting
1,Jake,Engineering
2,Lisa,Engineering
3,Sue,HR


Unnamed: 0,group,skills
0,Accounting,math
1,Accounting,spreadsheets
2,Engineering,coding
3,Engineering,linux
4,HR,spreadsheets
5,HR,organization


In [14]:
df7 = pd.merge(df1,df6)
df7

Unnamed: 0,employee,group,skills
0,Bob,Accounting,math
1,Bob,Accounting,spreadsheets
2,Jake,Engineering,coding
3,Jake,Engineering,linux
4,Lisa,Engineering,coding
5,Lisa,Engineering,linux
6,Sue,HR,spreadsheets
7,Sue,HR,organization


In [15]:
df7.groupby('group').value_counts()

group        employee  skills      
Accounting   Bob       math            1
                       spreadsheets    1
Engineering  Jake      coding          1
                       linux           1
             Lisa      coding          1
                       linux           1
HR           Sue       organization    1
                       spreadsheets    1
Name: count, dtype: int64

In [16]:
df7['skills'].value_counts()

skills
spreadsheets    2
coding          2
linux           2
math            1
organization    1
Name: count, dtype: int64

In [17]:
df7 = df7.set_index('employee')
df7.index.name= 'names'
employee = df7.index.values
df8 = pd.DataFrame({'employee': employee,
                    'somerandomvalues': np.arange((len(employee)))})

In [18]:
display(df7), display(df8)
pass

Unnamed: 0_level_0,group,skills
names,Unnamed: 1_level_1,Unnamed: 2_level_1
Bob,Accounting,math
Bob,Accounting,spreadsheets
Jake,Engineering,coding
Jake,Engineering,linux
Lisa,Engineering,coding
Lisa,Engineering,linux
Sue,HR,spreadsheets
Sue,HR,organization


Unnamed: 0,employee,somerandomvalues
0,Bob,0
1,Bob,1
2,Jake,2
3,Jake,3
4,Lisa,4
5,Lisa,5
6,Sue,6
7,Sue,7


In [19]:
df9 = pd.merge(df7,df8, left_index=True, right_on='employee')
df9.set_index('employee', inplace=True)
df9

Unnamed: 0_level_0,group,skills,somerandomvalues
employee,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bob,Accounting,math,0
Bob,Accounting,math,1
Bob,Accounting,spreadsheets,0
Bob,Accounting,spreadsheets,1
Jake,Engineering,coding,2
Jake,Engineering,coding,3
Jake,Engineering,linux,2
Jake,Engineering,linux,3
Lisa,Engineering,coding,4
Lisa,Engineering,coding,5


In [25]:
df10 = pd.DataFrame({'name': ['Peter', 'Paul', 'Mary'],
                    'food': ['fish', 'beans', 'bread']},
                   columns=['name', 'food'])
df11 = pd.DataFrame({'name': ['Mary', 'Joseph'],
                    'drink': ['wine', 'beer']},
                   columns=['name', 'drink'])

In [32]:
display(df10, df11, pd.merge(df10, df11)) #how='inner' by default

Unnamed: 0,name,food
0,Peter,fish
1,Paul,beans
2,Mary,bread


Unnamed: 0,name,drink
0,Mary,wine
1,Joseph,beer


Unnamed: 0,name,food,drink
0,Mary,bread,wine


In [33]:
display(pd.merge(df10, df11, how='outer'))

Unnamed: 0,name,food,drink
0,Peter,fish,
1,Paul,beans,
2,Mary,bread,wine
3,Joseph,,beer


In [34]:
pd.merge(df10, df11, how='left')

Unnamed: 0,name,food,drink
0,Peter,fish,
1,Paul,beans,
2,Mary,bread,wine


In [37]:
df8 = pd.DataFrame({'name': ['Bob', 'Jake', 'Lisa', 'Sue'],
                    'rank': [1, 2, 3, 4]})
df9 = pd.DataFrame({'name': ['Bob', 'Jake', 'Lisa', 'Sue'],
                    'rank': [3, 1, 4, 2]})

#in case of conflicting coloumn names
display(df8, df9, pd.merge(df8, df9, on="name"))

Unnamed: 0,name,rank
0,Bob,1
1,Jake,2
2,Lisa,3
3,Sue,4


Unnamed: 0,name,rank
0,Bob,3
1,Jake,1
2,Lisa,4
3,Sue,2


Unnamed: 0,name,rank_x,rank_y
0,Bob,1,3
1,Jake,2,1
2,Lisa,3,4
3,Sue,4,2


In [21]:
add = lambda *args: sum(args)
prod = lambda *x: np.prod(x) 

In [22]:
prod(1,2,6)

12

In [24]:
add(
    prod(
        add(2, 5),
        add(5, 7), 
        5), 
    add(
        add(4, 6, 7), 
        7))

444

$(\lim_{{x \to a}} f(x) = L)$ and $(\lim_{{x \to a}} g(x) = M)$, then $(\lim_{{x \to a}} [f(x) + g(x)] = L + M). $