In [1]:
import numpy as np
import pandas as pd

from pandas import Series, DataFrame

### 1. Creating a series

In [2]:
mySeries = Series(np.arange(8), index=['row1','row2','row3','row4','row5','row6','row7','row8'])

In [3]:
mySeries

row1    0
row2    1
row3    2
row4    3
row5    4
row6    5
row7    6
row8    7
dtype: int32

In [4]:
mySeries['row7']

6

In [5]:
mySeries[[0,7]]

row1    0
row8    7
dtype: int32

### 2. Creating a dataframe

In [6]:
np.random.seed(25)

In [7]:
df = DataFrame(np.random.rand(36).reshape((6,6)))

In [8]:
df

Unnamed: 0,0,1,2,3,4,5
0,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376
1,0.684969,0.437611,0.556229,0.36708,0.402366,0.113041
2,0.447031,0.585445,0.161985,0.520719,0.326051,0.699186
3,0.366395,0.836375,0.481343,0.516502,0.383048,0.997541
4,0.514244,0.559053,0.03445,0.71993,0.421004,0.436935
5,0.281701,0.900274,0.669612,0.456069,0.289804,0.525819


In [9]:
df = DataFrame(np.random.rand(36).reshape((6,6)),
              columns=['column1','column2','column3','column4','column5','column6'],
              index=['row1','row2','row3','row4','row5','row6'])

In [10]:
df

Unnamed: 0,column1,column2,column3,column4,column5,column6
row1,0.559242,0.745284,0.828346,0.823694,0.07714,0.644862
row2,0.309258,0.524254,0.958092,0.883201,0.295432,0.512376
row3,0.088702,0.641717,0.132421,0.766486,0.076742,0.331044
row4,0.679852,0.509213,0.655146,0.60212,0.719055,0.415219
row5,0.396542,0.825139,0.712552,0.097937,0.842154,0.440821
row6,0.373989,0.913676,0.547778,0.251937,0.027474,0.206257


In [12]:
df[['column1','column4']]

Unnamed: 0,column1,column4
row1,0.559242,0.823694
row2,0.309258,0.883201
row3,0.088702,0.766486
row4,0.679852,0.60212
row5,0.396542,0.097937
row6,0.373989,0.251937


In [13]:
df.loc[['row2','row3']]

Unnamed: 0,column1,column2,column3,column4,column5,column6
row2,0.309258,0.524254,0.958092,0.883201,0.295432,0.512376
row3,0.088702,0.641717,0.132421,0.766486,0.076742,0.331044


In [14]:
df['row1':'row3']

Unnamed: 0,column1,column2,column3,column4,column5,column6
row1,0.559242,0.745284,0.828346,0.823694,0.07714,0.644862
row2,0.309258,0.524254,0.958092,0.883201,0.295432,0.512376
row3,0.088702,0.641717,0.132421,0.766486,0.076742,0.331044


In [15]:
df > 0.5

Unnamed: 0,column1,column2,column3,column4,column5,column6
row1,True,True,True,True,False,True
row2,False,True,True,True,False,True
row3,False,True,False,True,False,False
row4,True,True,True,True,True,False
row5,False,True,True,False,True,False
row6,False,True,True,False,False,False


In [16]:
mySeries > 5

row1    False
row2    False
row3    False
row4    False
row5    False
row6    False
row7     True
row8     True
dtype: bool

In [18]:
mySeries[mySeries > 5]

row7    6
row8    7
dtype: int32

### 3. Removing Duplicates

In [19]:
df = DataFrame({'Column 1':[1,1,2,2,3,3,3],
               'Column 2':['a','a','b','b','c','c','c'],
               'Column 3':['A','A','B','B','C','C','C']})

In [20]:
df

Unnamed: 0,Column 1,Column 2,Column 3
0,1,a,A
1,1,a,A
2,2,b,B
3,2,b,B
4,3,c,C
5,3,c,C
6,3,c,C


In [21]:
df.duplicated()

0    False
1     True
2    False
3     True
4    False
5     True
6     True
dtype: bool

In [22]:
df1 = df.drop_duplicates()

In [23]:
df1

Unnamed: 0,Column 1,Column 2,Column 3
0,1,a,A
2,2,b,B
4,3,c,C


In [24]:
df = DataFrame({'Column 1':[1,1,2,2,3,3,3],
               'Column 2':['a','a','b','b','c','c','c'],
               'Column 3':['A','A','B','B','C','D','C']})

In [25]:
df

Unnamed: 0,Column 1,Column 2,Column 3
0,1,a,A
1,1,a,A
2,2,b,B
3,2,b,B
4,3,c,C
5,3,c,D
6,3,c,C


In [26]:
df.drop_duplicates(['Column 3'])

Unnamed: 0,Column 1,Column 2,Column 3
0,1,a,A
2,2,b,B
4,3,c,C
5,3,c,D


### 4. Concatenation and Transformation 

In [27]:
df = DataFrame(np.arange(36).reshape(6,6))

In [28]:
df

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [29]:
df1 = DataFrame(np.arange(15).reshape(5,3))

In [30]:
df1

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11
4,12,13,14


In [31]:
concat_df = pd.concat([df, df1])

In [32]:
concat_df

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3.0,4.0,5.0
1,6,7,8,9.0,10.0,11.0
2,12,13,14,15.0,16.0,17.0
3,18,19,20,21.0,22.0,23.0
4,24,25,26,27.0,28.0,29.0
5,30,31,32,33.0,34.0,35.0
0,0,1,2,,,
1,3,4,5,,,
2,6,7,8,,,
3,9,10,11,,,


In [33]:
concat_df = pd.concat([df, df1], axis=1)

In [35]:
concat_df

Unnamed: 0,0,1,2,3,4,5,0.1,1.1,2.1
0,0,1,2,3,4,5,0.0,1.0,2.0
1,6,7,8,9,10,11,3.0,4.0,5.0
2,12,13,14,15,16,17,6.0,7.0,8.0
3,18,19,20,21,22,23,9.0,10.0,11.0
4,24,25,26,27,28,29,12.0,13.0,14.0
5,30,31,32,33,34,35,,,


In [36]:
df.drop([0,2])

Unnamed: 0,0,1,2,3,4,5
1,6,7,8,9,10,11
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [38]:
df.drop([5], axis=1)

Unnamed: 0,0,1,2,3,4
0,0,1,2,3,4
1,6,7,8,9,10
2,12,13,14,15,16
3,18,19,20,21,22
4,24,25,26,27,28
5,30,31,32,33,34


In [39]:
# join function
series_obj = Series(np.arange(6))

In [40]:
series_obj.name = 'New Column'

In [41]:
series_obj

0    0
1    1
2    2
3    3
4    4
5    5
Name: New Column, dtype: int32

In [42]:
column_added = df.join(series_obj)

In [43]:
column_added

Unnamed: 0,0,1,2,3,4,5,New Column
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5


In [44]:
# sorting numbers
df

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [45]:
df.sort_values(by=[5], ascending=False)

Unnamed: 0,0,1,2,3,4,5
5,30,31,32,33,34,35
4,24,25,26,27,28,29
3,18,19,20,21,22,23
2,12,13,14,15,16,17
1,6,7,8,9,10,11
0,0,1,2,3,4,5
