In [1]:
import pandas as pd

In [2]:
#creating series in pandas
data = [1,2,3,4,5]
series = pd.Series(data, index=['A', 'B', 'C', 'D', 'E'])
series

A    1
B    2
C    3
D    4
E    5
dtype: int64

In [3]:
#creating DataFrame in pandas
data = {'Name': ['Sam', 'Sana', 'JD', 'JAM'],
        'Age': [32, 2, 28, 33],
        'color': ['Blue', 'Black', 'Red', 'White']}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,color
0,Sam,32,Blue
1,Sana,2,Black
2,JD,28,Red
3,JAM,33,White


Data Transformation with Pandas: Grouping, Merging, and Concatenating
- Group-By
- Merging
- Concatenating

In [9]:
data = {
    'Name': ['Akash', 'Sinha', 'Mitra', 'Poulin'],
    'Age': [23, 56, 88, 35],
    'Country': ['India', 'Denmark', 'Aus', 'Caneda']
}

df = pd.DataFrame(data)

groupeddata = df.groupby(['Country'])
print(groupeddata['Age'].mean())

Country
Aus        88.0
Caneda     35.0
Denmark    56.0
India      23.0
Name: Age, dtype: float64


In [10]:
#merging and joining
data1 = {
    'Name': ['Akash', 'Sinha', 'Mitra', 'Poulin'],
    'Age': [23, 56, 88, 35]
}

data2 = {
    'Name': ['Akash', 'Sinha', 'Mitra', 'Poulin'],
    'color': ['Blue', 'Black', 'Red', 'White']
}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

mergeddata = pd.merge(df1, df2, on='Name')
mergeddata

Unnamed: 0,Name,Age,color
0,Akash,23,Blue
1,Sinha,56,Black
2,Mitra,88,Red
3,Poulin,35,White


In [13]:
#Concatination
data1 = {
    'Name': ['Akash', 'Sinha'],
    'Age': [23, 56]
}

data2 = {
    'Name': ['Mitra', 'Poulin'],
    'Age': [12, 54]
}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

concatenateddata = pd.concat([df1, df2], axis=0)
concatenateddata

Unnamed: 0,Name,Age
0,Akash,23
1,Sinha,56
0,Mitra,12
1,Poulin,54


Indexing & Slicing

In [14]:
data = {
    'Name': ['Akash', 'Sinha', 'Mitra', 'Poulin'],
    'Age': [23, 56, 88, 35],
    'Country': ['India', 'Denmark', 'Aus', 'Caneda']
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Country
0,Akash,23,India
1,Sinha,56,Denmark
2,Mitra,88,Aus
3,Poulin,35,Caneda


In [15]:
df['Age']

0    23
1    56
2    88
3    35
Name: Age, dtype: int64

In [18]:
#df.loc (integer values) and df.iloc (col name)
#row_indexer, column_indexer
df.loc[0, 'Name']

'Akash'

In [19]:
# this will take both int
df.iloc[0,0]

'Akash'

In [22]:
df.iloc[3,2]

'Caneda'

In [23]:
df

Unnamed: 0,Name,Age,Country
0,Akash,23,India
1,Sinha,56,Denmark
2,Mitra,88,Aus
3,Poulin,35,Caneda


In [25]:
df.loc[2,'Age']

88

Slicing

In [26]:
df[1:3]

Unnamed: 0,Name,Age,Country
1,Sinha,56,Denmark
2,Mitra,88,Aus


In [27]:
#start:stop,col_index
print(df.loc[1:3,['Name','Country']])

     Name  Country
1   Sinha  Denmark
2   Mitra      Aus
3  Poulin   Caneda


Sorting, Filtering & Mapping of Data

In [34]:
data = {
    'Name': ['Jay', 'Kol', 'Man', 'Puru'],
    'English': [34, 55, 23, 66],
    'Maths': [66, 87, 12, 56],
    'Hindi': [33, 54, 61, 21]
}

df = pd.DataFrame(data, index=[1, 2, 3, 4])
df

Unnamed: 0,Name,English,Maths,Hindi
1,Jay,34,66,33
2,Kol,55,87,54
3,Man,23,12,61
4,Puru,66,56,21


In [35]:
#sorting data
engdatasort = df.sort_values(by=['English'], ascending=False)
engdatasort

Unnamed: 0,Name,English,Maths,Hindi
4,Puru,66,56,21
2,Kol,55,87,54
1,Jay,34,66,33
3,Man,23,12,61


In [36]:
#sorting multiple columns
data_s = df.sort_values(by=['English', 'Maths'], ascending=[True, False])
data_s

Unnamed: 0,Name,English,Maths,Hindi
3,Man,23,12,61
1,Jay,34,66,33
2,Kol,55,87,54
4,Puru,66,56,21


In [39]:
#filtering data
#boolean

filterdata = df[df['Maths']>42]
filterdata

Unnamed: 0,Name,English,Maths,Hindi
1,Jay,34,66,33
2,Kol,55,87,54
4,Puru,66,56,21


In [40]:
#filter data by passing multiple conditions

filterdata = df[(df['Maths']>40) & (df['Hindi']>35)]
filterdata

Unnamed: 0,Name,English,Maths,Hindi
2,Kol,55,87,54


Mapping 

In [44]:
#mapping data
mapped = {'Jay': 2, 'Kol': 1, 'Man': 2, 'Puru': 0}

df['proficiency'] = df['Name'].map(mapped)
df

Unnamed: 0,Name,English,Maths,Hindi,proficiency
1,Jay,34,66,33,2
2,Kol,55,87,54,1
3,Man,23,12,61,2
4,Puru,66,56,21,0
