### Loading Library

In [4]:
import pandas as pd
import numpy as np

### 1. Create DataFrame

In [5]:
data = {
    'roll_no': [3, 2, 7, 11],
    'ppr_id': [34, 21, 10, 11], 
    'marks': [30, 23, 17, 27]
}

In [6]:
df1 = pd.DataFrame(data)

In [7]:
df1

Unnamed: 0,marks,ppr_id,roll_no
0,30,34,3
1,23,21,2
2,17,10,7
3,27,11,11


### 2.  Setting index

In [8]:
df2 = pd.DataFrame(data, index = ['ab', 'ef', 'xy', 'uv'])

In [9]:
df2

Unnamed: 0,marks,ppr_id,roll_no
ab,30,34,3
ef,23,21,2
xy,17,10,7
uv,27,11,11


### 3. Extracting Info

In [10]:
df2.loc['xy']

marks      17
ppr_id     10
roll_no     7
Name: xy, dtype: int64

In [11]:
# last column value

df2.iloc[:, -1]

ab     3
ef     2
xy     7
uv    11
Name: roll_no, dtype: int64

In [12]:
# extracting from specific rows and columns

df2.iloc[0:2, 2:3]

Unnamed: 0,roll_no
ab,3
ef,2


## Working on CSV

### 1. Loading Data

In [13]:
df = pd.read_csv('C:\\Users\\dell\\Desktop\\iris_dataset.csv')

In [14]:
# view of data

df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [15]:
df.head(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
5,5.4,3.9,1.7,0.4
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2
8,4.4,2.9,1.4,0.2
9,4.9,3.1,1.5,0.1


### 2. Data Info

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
sepal_length    150 non-null float64
sepal_width     150 non-null float64
petal_length    150 non-null float64
petal_width     150 non-null float64
dtypes: float64(4)
memory usage: 4.8 KB


### 3. Data Description

In [17]:
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


### 4.  Data Selection

In [18]:
# pandas series

df['sepal_width'][:6]

0    3.5
1    3.0
2    3.2
3    3.1
4    3.6
5    3.9
Name: sepal_width, dtype: float64

In [19]:
# pandas dataframe

df[['sepal_width']][:6]

Unnamed: 0,sepal_width
0,3.5
1,3.0
2,3.2
3,3.1
4,3.6
5,3.9


In [20]:
df[['sepal_width', 'petal_width']].head()

Unnamed: 0,sepal_width,petal_width
0,3.5,0.2
1,3.0,0.2
2,3.2,0.2
3,3.1,0.2
4,3.6,0.2


In [21]:
df.iloc[:10, [1, 3]]

Unnamed: 0,sepal_width,petal_width
0,3.5,0.2
1,3.0,0.2
2,3.2,0.2
3,3.1,0.2
4,3.6,0.2
5,3.9,0.4
6,3.4,0.3
7,3.4,0.2
8,2.9,0.2
9,3.1,0.1


### 5. Missing Values

#### 5.1 NULL Values

In [22]:
data = {
    'roll_no': [3, 2, 7, 11],
    'ppr_id': [34, 21, 10, 11], 
    'marks': [np.nan, 23, 17, 27]
}

In [23]:
df1 = pd.DataFrame(data)
df1

Unnamed: 0,marks,ppr_id,roll_no
0,,34,3
1,23.0,21,2
2,17.0,10,7
3,27.0,11,11


#### 5.2 isnull()

In [24]:
# Detection of NULL values

df1.isnull()

Unnamed: 0,marks,ppr_id,roll_no
0,True,False,False
1,False,False,False
2,False,False,False
3,False,False,False


In [25]:
# No. of NULL values

df1.isnull().sum()

marks      1
ppr_id     0
roll_no    0
dtype: int64

#### 5.3 fillna()

In [26]:
# Filling NULL values
df2 = df1.fillna(1)
df2

Unnamed: 0,marks,ppr_id,roll_no
0,1.0,34,3
1,23.0,21,2
2,17.0,10,7
3,27.0,11,11


#### 5.4 Dropping NULL values

In [27]:
# Dropping NULL values

a = df1.dropna()
a

Unnamed: 0,marks,ppr_id,roll_no
1,23.0,21,2
2,17.0,10,7
3,27.0,11,11


In [28]:
a = df1.dropna(axis=0)
a

Unnamed: 0,marks,ppr_id,roll_no
1,23.0,21,2
2,17.0,10,7
3,27.0,11,11


In [29]:
a = df1.dropna(axis=1)
a

Unnamed: 0,ppr_id,roll_no
0,34,3
1,21,2
2,10,7
3,11,11


#### 5.5 Creating a data with non NULL values

In [30]:
a = pd.notnull(df1["marks"])

In [31]:
df1[a]

Unnamed: 0,marks,ppr_id,roll_no
1,23.0,21,2
2,17.0,10,7
3,27.0,11,11


### 6 Statistics

In [58]:
data = {
    'roll_no': [3, 3, 7, 11],
    'ppr_id': [23, np.nan, 10, 11], 
    'marks': [30, 22, 17, 27]
}

df1 = pd.DataFrame(data)

In [59]:
# Total Marks
df1['marks'].sum()

96

In [60]:
# Average Marks
df1['marks'].mean()

24.0

In [61]:
# Cummulative Sum
df1['marks'].cumsum()

0    30
1    52
2    69
3    96
Name: marks, dtype: int64

In [62]:
# count

df1['marks'].count()

4

In [63]:
df1['marks'].min()

17

In [65]:
df1['ppr_id'].min()

10.0

In [64]:
df1['marks'].max()

30

In [66]:
df1['marks'].var()

32.666666666666664

In [67]:
df1['marks'].std()

5.715476066494082

In [68]:
df1.corr()

Unnamed: 0,marks,ppr_id,roll_no
marks,1.0,0.727698,-0.060914
ppr_id,0.727698,1.0,-0.829396
roll_no,-0.060914,-0.829396,1.0
