In [2]:
import numpy as np
import pandas as pd

s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])
print(pd.concat([s1, s2], ignore_index=True), end = "\n\n")


0    a
1    b
2    c
3    d
dtype: object



In [3]:
print(pd.concat([s1, s2], ignore_index=True, axis=1), end = "\n\n")

   0  1
0  a  c
1  b  d



In [4]:
df1 = pd.DataFrame([['a', 1], ['b', 2]], columns=['letter', 'number'])
df2 = pd.DataFrame([['c', 3], ['d', 4]], columns=['letter', 'number'])
print(pd.concat([df1, df2], ignore_index = True))


  letter  number
0      a       1
1      b       2
2      c       3
3      d       4


In [6]:
print(pd.concat([df1, df2],  axis=1))

  letter  number letter  number
0      a       1      c       3
1      b       2      d       4


In [7]:
df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], columns=['letter', 'number', 'animal'])
print(pd.concat([df1, df3], ignore_index = True))


  letter  number animal
0      a       1    NaN
1      b       2    NaN
2      c       3    cat
3      d       4    dog


In [9]:
df4 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
df5 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
print(df4)
print(df5)
pd.merge(df4, df5, how='inner', on='a')


     a  b
0  foo  1
1  bar  2
     a  c
0  foo  3
1  baz  4


Unnamed: 0,a,b,c
0,foo,1,3


In [10]:
pd.merge(df4, df5, how='left', on='a')


Unnamed: 0,a,b,c
0,foo,1,3.0
1,bar,2,


In [11]:
pd.merge(df4, df5, how='right', on='a')

Unnamed: 0,a,b,c
0,foo,1.0,3
1,baz,,4


In [12]:
pd.merge(df4, df5, how='outer', on='a')

Unnamed: 0,a,b,c
0,bar,2.0,
1,baz,,4.0
2,foo,1.0,3.0


In [13]:
pd.merge(df4, df5, how='cross')

Unnamed: 0,a_x,b,a_y,c
0,foo,1,foo,3
1,foo,1,baz,4
2,bar,2,foo,3
3,bar,2,baz,4


In [14]:
df6 = pd.DataFrame({
    'Rajasthan': pd.Series(['Jaipur', 'Jodhpur', 'Sikar']),
    'Punjab': pd.Series(['Chandigarh', 'Patiala', 'Amritsar', 'Firozpur'])
})
print(df6, end = '\n\n')

df7 = pd.DataFrame({
    'Rajasthan': pd.Series(['Jaipur', 'Jodhpur']),
    'Gujrat': pd.Series(['Gandhinagar', 'Ahmedabad', 'Surat', 'Rajkot'])
})
print(df7, end = '\n\n')


  Rajasthan      Punjab
0    Jaipur  Chandigarh
1   Jodhpur     Patiala
2     Sikar    Amritsar
3       NaN    Firozpur

  Rajasthan       Gujrat
0    Jaipur  Gandhinagar
1   Jodhpur    Ahmedabad
2       NaN        Surat
3       NaN       Rajkot



In [15]:
pd.merge(df6, df7, how='inner')

Unnamed: 0,Rajasthan,Punjab,Gujrat
0,Jaipur,Chandigarh,Gandhinagar
1,Jodhpur,Patiala,Ahmedabad
2,,Firozpur,Surat
3,,Firozpur,Rajkot


In [16]:
pd.merge(df6, df7, how='left')

Unnamed: 0,Rajasthan,Punjab,Gujrat
0,Jaipur,Chandigarh,Gandhinagar
1,Jodhpur,Patiala,Ahmedabad
2,Sikar,Amritsar,
3,,Firozpur,Surat
4,,Firozpur,Rajkot


In [17]:
pd.merge(df6, df7, how='right')

Unnamed: 0,Rajasthan,Punjab,Gujrat
0,Jaipur,Chandigarh,Gandhinagar
1,Jodhpur,Patiala,Ahmedabad
2,,Firozpur,Surat
3,,Firozpur,Rajkot


In [18]:


left = pd.DataFrame(  
  {
    "A": ["A0", "A1", "A2"],
    "B": ["B0", "B1", "B2"]
  },
  index=["K0", "K1", "K2"]
)

right = pd.DataFrame(
  {
    "C": ["C0", "C2", "C3"],
    "D": ["D0", "D2", "D3"]
  },
  index=["K0", "K2", "K3"]
)

print(left, end = '\n\n')
print(right, end = '\n\n')


     A   B
K0  A0  B0
K1  A1  B1
K2  A2  B2

     C   D
K0  C0  D0
K2  C2  D2
K3  C3  D3



In [19]:
left.join(right)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [20]:

left.join(right, how="outer")


Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


In [21]:
left.join(right, how="inner")

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K2,A2,B2,C2,D2


In [22]:
dFrame1 = pd.DataFrame([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
    [np.nan, np.nan, np.nan]
  ],
  columns=['A', 'B', 'C']
)
print("Original DataFrame is\n", dFrame1, sep = "")


Original DataFrame is
     A    B    C
0  1.0  2.0  3.0
1  4.0  5.0  6.0
2  7.0  8.0  9.0
3  NaN  NaN  NaN


In [23]:
print(dFrame1.agg(lambda x: np.sum(x)))

A    12.0
B    15.0
C    18.0
dtype: float64


In [27]:
print(dFrame1.agg('sum'))

A    12.0
B    15.0
C    18.0
dtype: float64


In [29]:
print("\nThe sum and The min is\n", dFrame1.agg(['sum', 'min', 'max']), sep = "")


The sum and The min is
        A     B     C
sum  12.0  15.0  18.0
min   1.0   2.0   3.0
max   7.0   8.0   9.0


In [30]:
print(dFrame1.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}))

        A    B
sum  12.0  NaN
min   1.0  2.0
max   NaN  8.0


In [31]:
print(dFrame1.agg(x = ('A', 'max'), y = ('B', 'min'), z = ('C', 'mean')))

     A    B    C
x  7.0  NaN  NaN
y  NaN  2.0  NaN
z  NaN  NaN  6.0


In [32]:
print(dFrame1.agg("mean", axis = "columns"))

0    2.0
1    5.0
2    8.0
3    NaN
dtype: float64


In [33]:
dFrame2 = pd.DataFrame({'A': range(3), 'B': range(1, 4)})
print(dFrame2)


   A  B
0  0  1
1  1  2
2  2  3


In [34]:
print(dFrame2.transform(lambda x: x + 1))

   A  B
0  1  2
1  2  3
2  3  4


In [35]:
s = pd.Series(range(3))
print(s, end = '\n\n')
print(s.transform([np.sqrt, np.exp]))


0    0
1    1
2    2
dtype: int64

       sqrt       exp
0  0.000000  1.000000
1  1.000000  2.718282
2  1.414214  7.389056


In [36]:
print(dFrame2.transform({"A": np.abs, "B": lambda x: x + 1}))

   A  B
0  0  2
1  1  3
2  2  4


In [37]:
dFrame1 = pd.DataFrame(
  {
    'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],
    'Max Speed': [380., 370., 24., 26.]
  }
)
print(dFrame1)
print(type(dFrame1))


   Animal  Max Speed
0  Falcon      380.0
1  Falcon      370.0
2  Parrot       24.0
3  Parrot       26.0
<class 'pandas.core.frame.DataFrame'>


In [38]:

dFrame2 = dFrame1.groupby(['Animal'])
print(dFrame2)
print(type(dFrame2))


<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001FC3A357FE0>
<class 'pandas.core.groupby.generic.DataFrameGroupBy'>


In [39]:
dFrame2.get_group(("Falcon", ))

Unnamed: 0,Animal,Max Speed
0,Falcon,380.0
1,Falcon,370.0


In [40]:
print("The groups are:", dFrame2.groups)
print("Total groups are:", len(dFrame2.groups))


The groups are: {'Falcon': [0, 1], 'Parrot': [2, 3]}
Total groups are: 2


In [41]:
dFrame2.first()

Unnamed: 0_level_0,Max Speed
Animal,Unnamed: 1_level_1
Falcon,380.0
Parrot,24.0


In [42]:
dFrame2.last()

Unnamed: 0_level_0,Max Speed
Animal,Unnamed: 1_level_1
Falcon,370.0
Parrot,26.0


In [43]:
dFrame1[dFrame1['Animal'].isin(['Falcon', 'Parrot'])]

Unnamed: 0,Animal,Max Speed
0,Falcon,380.0
1,Falcon,370.0
2,Parrot,24.0
3,Parrot,26.0


In [45]:
print(dFrame2.mean(),end = "\n\n")
print(dFrame2.aggregate(["sum", "max", "min"]))



        Max Speed
Animal           
Falcon      375.0
Parrot       25.0

       Max Speed              
             sum    max    min
Animal                        
Falcon     750.0  380.0  370.0
Parrot      50.0   26.0   24.0
