# Pandas and Matplotlib
- Pandas Introduction
- Pandas Series, Data Frames,Indexing/Slicing for Data Frames
- Basic Operations With Data frame,Renaming Columns, filtering a data frame. 
- sort and merging dataframes
- Importing external data to DataFrame
- Matplotlib - Introduction,plot(),Controlling Line Properties,Working with Multiple Figures,Histograms

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

###### Pandas Series, Data Frames,Indexing/Slicing for Data Frames

###### Series

In [3]:
s = pd.Series()
s

  """Entry point for launching an IPython kernel.


Series([], dtype: float64)

In [4]:
a = [1,2,3,4]
s = pd.Series(a)
s

0    1
1    2
2    3
3    4
dtype: int64

In [5]:
a = [1,2,3,4.3]
s = pd.Series(a)
s

0    1.0
1    2.0
2    3.0
3    4.3
dtype: float64

In [6]:
a = [1,2,3,4]
s = pd.Series(a,dtype="int8")
s

0    1
1    2
2    3
3    4
dtype: int8

In [7]:
a = {"A":1,"B":2,"C":3,"D":4}
s = pd.Series(a)
s

A    1
B    2
C    3
D    4
dtype: int64

In [8]:
a = {"A":1,"B":2,"C":3,"D":4}
s = pd.Series(a,index=["A","B","D","E"]) # NaN: Not a number
s

A    1.0
B    2.0
D    4.0
E    NaN
dtype: float64

In [9]:
s["E"]

nan

In [10]:
a = [1,2,3,4]
s = pd.Series(a,index=[11,12,13,14])
s

11    1
12    2
13    3
14    4
dtype: int64

In [11]:
a = [1,2,3,4]
s = pd.Series(a,index=list("abcd"))
s

a    1
b    2
c    3
d    4
dtype: int64

###### Indexing and slicing in Series

In [12]:
a = [1,2,3,4,5]
s = pd.Series(a)
s

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [13]:
s[2]

3

In [14]:
s[2:4]

2    3
3    4
dtype: int64

In [15]:
a = [1,2,3,4,5]
s = pd.Series(a,index=list("abcde"))
s

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [16]:
s[0]

1

In [17]:
s["a"]

1

In [18]:
s["a":"c"] # label based indexing, all the labels will be included in slicing

a    1
b    2
c    3
dtype: int64

In [21]:
s[["c","a"]]

c    3
a    1
dtype: int64

In [22]:
s["c":"a":-1]

c    3
b    2
a    1
dtype: int64

In [23]:
s[::-1]

e    5
d    4
c    3
b    2
a    1
dtype: int64

# Copy concept

In [20]:
a = np.random.randint(1,10,9).reshape(3,3)
a

array([[5, 5, 3],
       [2, 9, 5],
       [2, 2, 6]])

In [24]:
b = a # shallow copy
b

array([[5, 5, 3],
       [2, 9, 5],
       [2, 2, 6]])

In [25]:
b[0,0] = 100
b

array([[100,   5,   3],
       [  2,   9,   5],
       [  2,   2,   6]])

In [26]:
a

array([[100,   5,   3],
       [  2,   9,   5],
       [  2,   2,   6]])

In [27]:
a = np.random.randint(1,10,9).reshape(3,3)
a

array([[7, 5, 4],
       [3, 4, 5],
       [9, 6, 5]])

In [28]:
b = np.array(a,copy=True) # Deep copy

In [29]:
b

array([[7, 5, 4],
       [3, 4, 5],
       [9, 6, 5]])

In [30]:
b[0,0] = 200
b

array([[200,   5,   4],
       [  3,   4,   5],
       [  9,   6,   5]])

In [31]:
a

array([[7, 5, 4],
       [3, 4, 5],
       [9, 6, 5]])

###### Basic Operations With Data frame,Renaming Columns, filtering a data frame.

In [32]:
a = np.random.randint(1,10,20).reshape(5,4)
a

array([[2, 3, 1, 9],
       [8, 8, 4, 5],
       [9, 8, 3, 9],
       [2, 6, 7, 6],
       [5, 1, 7, 5]])

In [33]:
df = pd.DataFrame()
df

In [34]:
df = pd.DataFrame(a)
df

Unnamed: 0,0,1,2,3
0,2,3,1,9
1,8,8,4,5
2,9,8,3,9
3,2,6,7,6
4,5,1,7,5


In [35]:
df = pd.DataFrame(a,index=list("abcde"),columns=list("ABCD")) # Index is basically a row label
df

Unnamed: 0,A,B,C,D
a,2,3,1,9
b,8,8,4,5
c,9,8,3,9
d,2,6,7,6
e,5,1,7,5


###### Indexing and slicing on DataFrame

In [36]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [37]:
df.keys()

Index(['A', 'B', 'C', 'D'], dtype='object')

In [38]:
# df[] # bracket notation
# df. # dot notation

In [39]:
df.A

a    2
b    8
c    9
d    2
e    5
Name: A, dtype: int32

In [40]:
df["A"]

a    2
b    8
c    9
d    2
e    5
Name: A, dtype: int32

In [41]:
df[["A","C","B"]]

Unnamed: 0,A,C,B
a,2,1,3
b,8,4,8
c,9,3,8
d,2,7,6
e,5,7,1


###### use of loc/iloc

In [42]:
# # loc
# <data_frame>.loc[<row_label>,<column_label>]
# # iloc
# <data_frame>.iloc[<row_index>,<column_index>]


In [43]:
df

Unnamed: 0,A,B,C,D
a,2,3,1,9
b,8,8,4,5
c,9,8,3,9
d,2,6,7,6
e,5,1,7,5


In [49]:
df.loc["b","A"]

8

In [53]:
df.loc[["c","d"],["A","B","C"]]

Unnamed: 0,A,B,C
c,9,8,3
d,2,6,7


###### iloc

In [54]:
df

Unnamed: 0,A,B,C,D
a,2,3,1,9
b,8,8,4,5
c,9,8,3,9
d,2,6,7,6
e,5,1,7,5


In [55]:
df.iloc[0,0]

2

In [56]:
df.iloc[::-1,::-1]

Unnamed: 0,D,C,B,A
e,5,7,1,5
d,6,7,6,2
c,9,3,8,9
b,5,4,8,8
a,9,1,3,2


In [57]:
df.iloc[1:4]

Unnamed: 0,A,B,C,D
b,8,8,4,5
c,9,8,3,9
d,2,6,7,6


In [58]:
df.iloc[1:4,1:3]

Unnamed: 0,B,C
b,8,4
c,8,3
d,6,7


## Some more functions

In [59]:
df

Unnamed: 0,A,B,C,D
a,2,3,1,9
b,8,8,4,5
c,9,8,3,9
d,2,6,7,6
e,5,1,7,5


In [60]:
df.dtypes

A    int32
B    int32
C    int32
D    int32
dtype: object

In [61]:
df.describe()

Unnamed: 0,A,B,C,D
count,5.0,5.0,5.0,5.0
mean,5.2,5.2,4.4,6.8
std,3.271085,3.114482,2.607681,2.04939
min,2.0,1.0,1.0,5.0
25%,2.0,3.0,3.0,5.0
50%,5.0,6.0,4.0,6.0
75%,8.0,8.0,7.0,9.0
max,9.0,8.0,7.0,9.0


In [62]:
df.describe(include="all")

Unnamed: 0,A,B,C,D
count,5.0,5.0,5.0,5.0
mean,5.2,5.2,4.4,6.8
std,3.271085,3.114482,2.607681,2.04939
min,2.0,1.0,1.0,5.0
25%,2.0,3.0,3.0,5.0
50%,5.0,6.0,4.0,6.0
75%,8.0,8.0,7.0,9.0
max,9.0,8.0,7.0,9.0


In [63]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, a to e
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       5 non-null      int32
 1   B       5 non-null      int32
 2   C       5 non-null      int32
 3   D       5 non-null      int32
dtypes: int32(4)
memory usage: 280.0+ bytes


In [64]:
df

Unnamed: 0,A,B,C,D
a,2,3,1,9
b,8,8,4,5
c,9,8,3,9
d,2,6,7,6
e,5,1,7,5


In [78]:
df.loc["a","B"] = np.nan

In [79]:
df2 = df.copy()

In [80]:
df2

Unnamed: 0,A,B,C,D
a,2,,1,9
b,8,8.0,4,5
c,9,8.0,3,9
d,2,6.0,7,6
e,5,1.0,7,5


In [82]:
del df2["B"]

In [83]:
df2

Unnamed: 0,A,C,D
a,2,1,9
b,8,4,5
c,9,3,9
d,2,7,6
e,5,7,5


In [84]:
df.loc[['a','b','c']]

Unnamed: 0,A,B,C,D
a,2,,1,9
b,8,8.0,4,5
c,9,8.0,3,9


In [85]:
df.loc[['a','c']]

Unnamed: 0,A,B,C,D
a,2,,1,9
c,9,8.0,3,9


In [86]:
df.loc['a':'c','A':'C']

Unnamed: 0,A,B,C
a,2,,1
b,8,8.0,4
c,9,8.0,3


In [87]:
df.info(null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, a to e
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       5 non-null      int32  
 1   B       4 non-null      float64
 2   C       5 non-null      int32  
 3   D       5 non-null      int32  
dtypes: float64(1), int32(3)
memory usage: 300.0+ bytes
