### How will you add a column to a pandas DataFrame?

In [3]:
import pandas as pd
data= {'A':[1,2,3,4],'B':[5,6,7,8],'C':[9,10,11,12]}
df = pd.DataFrame(data)



In [48]:
df['D'] = pd.Series([14,15,16,17])
df

Unnamed: 0,A,B,C,D
0,1,5,9,14
1,2,6,10,15
2,3,7,11,16
3,4,8,12,17


In [49]:
data= {'A':[1,2,3,4],'B':[5,6,7,8],'C':[9,10,11,12]}
df = pd.DataFrame(data,index=['p','q','r','s'])
df

Unnamed: 0,A,B,C
p,1,5,9
q,2,6,10
r,3,7,11
s,4,8,12


In [50]:
df['D'] = pd.Series([13,14,15],index=['p','q','r'],dtype = int)
df

Unnamed: 0,A,B,C,D
p,1,5,9,13.0
q,2,6,10,14.0
r,3,7,11,15.0
s,4,8,12,


### How to add row to dataframe

In [51]:
df.loc['t']= [7,6,4,9]
df

Unnamed: 0,A,B,C,D
p,1,5,9,13.0
q,2,6,10,14.0
r,3,7,11,15.0
s,4,8,12,
t,7,6,4,9.0


### How to change Col name

In [54]:
df.rename(columns= {'D': 'E'},inplace=True)

In [55]:
df

Unnamed: 0,A,B,C,E
p,1,5,9,13.0
q,2,6,10,14.0
r,3,7,11,15.0
s,4,8,12,
t,7,6,4,9.0


### islower()

In [1]:
# It is used to return bool value for the string in a col or series

In [4]:
s = pd.Series(["KKK","aBc","aaa"])

In [7]:
s.str.islower()

0    False
1    False
2     True
dtype: bool

In [8]:
s.str.islower().sum()

1

In [9]:
# same goes wih isupper()

In [10]:
s.str.isupper()

0     True
1    False
2    False
dtype: bool

In [11]:
s.str.isupper().sum()

1

In [13]:
df = pd.DataFrame({'A':["AAA","bbb","Abc"],'B':["AAA","BBB","CCC"]})

In [15]:
df["A"].str.islower()

0    False
1     True
2    False
Name: A, dtype: bool

### len

In [16]:
len(df["A"]) # it return no. of rows

3

In [17]:
len(s)

3

In [18]:
len(df)

3

In [21]:
df.shape # it returns both no. of rows and col

(3, 2)

### strip()  It is used to remove leading and trailing spaces

In [22]:
df.str.strip()

AttributeError: 'DataFrame' object has no attribute 'str'

In [23]:
df['A'].str.strip()

0    AAA
1    bbb
2    Abc
Name: A, dtype: object

In [24]:
s.str.strip()

0    KKK
1    aBc
2    aaa
dtype: object

In [25]:
s.strip() # 1st covert to string as it is a string function

AttributeError: 'Series' object has no attribute 'strip'

### split() split on the basis of delimeter

In [43]:
df1 = pd.DataFrame({'Name':["Arun Chauhan","Ram Kumar"," Shyam Singh"],'Score':[12,15,18]})

In [44]:
df1

Unnamed: 0,Name,Score
0,Arun Chauhan,12
1,Ram Kumar,15
2,Shyam Singh,18


In [45]:
df1[["F_Name","L_Name"]]= df1["Name"].str.split(expand = True)

In [46]:
df1

Unnamed: 0,Name,Score,F_Name,L_Name
0,Arun Chauhan,12,Arun,Chauhan
1,Ram Kumar,15,Ram,Kumar
2,Shyam Singh,18,Shyam,Singh


### Replace replace cell value eg ALL "\<space> " to NAN

In [53]:
df1

Unnamed: 0,Name,Score,F_Name,L_Name
0,Arun Chauhan,12,Arun,Chauhan
1,Ram Kumar,15,Ram,Kumar
2,Shyam Singh,18,Shyam,Singh


In [54]:
df1["L_Name"].replace('Kumar','Thakur', inplace= True)

In [55]:
df1

Unnamed: 0,Name,Score,F_Name,L_Name
0,Arun Chauhan,12,Arun,Chauhan
1,Ram Kumar,15,Ram,Thakur
2,Shyam Singh,18,Shyam,Singh


### Pattern Finding in pandas

In [61]:
df2 = df1[df1["L_Name"].str.contains('^C\w{6}')][["Name","Score"]]

In [62]:
df2

Unnamed: 0,Name,Score
0,Arun Chauhan,12


### Rename

In [63]:
df

Unnamed: 0,A,B
0,AAA,AAA
1,bbb,BBB
2,Abc,CCC


In [67]:
df.rename(columns = {'A':'a'},inplace = True)

In [68]:
df

Unnamed: 0,a,B
0,AAA,AAA
1,bbb,BBB
2,Abc,CCC


In [72]:
df["index"] = [1,2,3]

In [73]:
df

Unnamed: 0,a,B,index
0,AAA,AAA,1
1,bbb,BBB,2
2,Abc,CCC,3


In [77]:
df.set_index("index",inplace = True)

In [78]:
df

Unnamed: 0_level_0,a,B
index,Unnamed: 1_level_1,Unnamed: 2_level_1
1,AAA,AAA
2,bbb,BBB
3,Abc,CCC


In [81]:
df.rename(index={1:"First",2:'Second'},inplace = True)

In [82]:
df

Unnamed: 0_level_0,a,B
index,Unnamed: 1_level_1,Unnamed: 2_level_1
First,AAA,AAA
Second,bbb,BBB
3,Abc,CCC


In [83]:
df1

Unnamed: 0,Name,Score,F_Name,L_Name
0,Arun Chauhan,12,Arun,Chauhan
1,Ram Kumar,15,Ram,Thakur
2,Shyam Singh,18,Shyam,Singh


In [90]:
df1.rename(index = {0:1,1:2},inplace = True)

In [91]:
df1

Unnamed: 0,Name,Score,F_Name,L_Name
1,Arun Chauhan,12,Arun,Chauhan
2,Ram Kumar,15,Ram,Thakur
2,Shyam Singh,18,Shyam,Singh


In [92]:
df1.loc[2]

Unnamed: 0,Name,Score,F_Name,L_Name
2,Ram Kumar,15,Ram,Thakur
2,Shyam Singh,18,Shyam,Singh


In [93]:
df1.iloc[2]

Name       Shyam Singh
Score               18
F_Name           Shyam
L_Name           Singh
Name: 2, dtype: object