## Loading Library

In [1]:
import numpy as np
import pandas as pd

## 1. Create DataFrame

In [2]:
data = {
    'roll_no': [3, 2, 7, 11], 
    'ppr_id': [34, 31, 10, 11], 
    'marks': [30, 23, 17, 27]
}

In [3]:
data

{'roll_no': [3, 2, 7, 11],
 'ppr_id': [34, 31, 10, 11],
 'marks': [30, 23, 17, 27]}

In [4]:
type(data)

dict

In [5]:
df1 = pd.DataFrame(data)

In [7]:
df1

Unnamed: 0,roll_no,ppr_id,marks
0,3,34,30
1,2,31,23
2,7,10,17
3,11,11,27


## 2. Setting index

In [15]:
name = ['shashi', 'rishabh', 'c', 'd']

In [16]:
df2 = pd.DataFrame(data, index = name)
df2

Unnamed: 0,roll_no,ppr_id,marks
shashi,3,34,30
rishabh,2,31,23
c,7,10,17
d,11,11,27


## 3. Extracting info

In [17]:
df2.loc['shashi']

roll_no     3
ppr_id     34
marks      30
Name: shashi, dtype: int64

In [26]:
df2.iloc[0:3, 1:-1]

Unnamed: 0,ppr_id
shashi,34
rishabh,31
c,10


## Working on CSV

## 1. Loading the data

In [27]:
df = pd.read_csv('iris_dataset.csv')

In [31]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [32]:
df.tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3
149,5.9,3.0,5.1,1.8


In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB


In [35]:
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


### Data Selection

In [46]:
df['sepal_width']

0      3.5
1      3.0
2      3.2
3      3.1
4      3.6
      ... 
145    3.0
146    2.5
147    3.0
148    3.4
149    3.0
Name: sepal_width, Length: 150, dtype: float64

In [37]:
type(df['sepal_width'])

pandas.core.series.Series

In [40]:
df[['sepal_width']]

Unnamed: 0,sepal_width
0,3.5
1,3.0
2,3.2
3,3.1
4,3.6
...,...
145,3.0
146,2.5
147,3.0
148,3.4


In [41]:
type(df[['sepal_width']])

pandas.core.frame.DataFrame

In [42]:
df[['sepal_width']][:5]

Unnamed: 0,sepal_width
0,3.5
1,3.0
2,3.2
3,3.1
4,3.6


In [47]:
df[['sepal_width', 'petal_width']].head()

Unnamed: 0,sepal_width,petal_width
0,3.5,0.2
1,3.0,0.2
2,3.2,0.2
3,3.1,0.2
4,3.6,0.2


In [49]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [51]:
df.iloc[:10, [1, 3]]

Unnamed: 0,sepal_width,petal_width
0,3.5,0.2
1,3.0,0.2
2,3.2,0.2
3,3.1,0.2
4,3.6,0.2
5,3.9,0.4
6,3.4,0.3
7,3.4,0.2
8,2.9,0.2
9,3.1,0.1


In [53]:
df.loc[:10, ['sepal_length', 'petal_width']]

Unnamed: 0,sepal_length,petal_width
0,5.1,0.2
1,4.9,0.2
2,4.7,0.2
3,4.6,0.2
4,5.0,0.2
5,5.4,0.4
6,4.6,0.3
7,5.0,0.2
8,4.4,0.2
9,4.9,0.1


## Null Values

In [57]:
data = {
    'roll_no': [3, 2, 7, 11],
    'ppr_id': [34, 21, 10, 11], 
    'marks': [np.nan, 23, 17, 27]
}

In [55]:
df1 = pd.DataFrame(data)
df1

Unnamed: 0,roll_no,ppr_id,marks
0,3,34,
1,2,21,23.0
2,7,10,17.0
3,11,11,27.0


In [56]:
# isnull()

df1.isnull()

Unnamed: 0,roll_no,ppr_id,marks
0,False,False,True
1,False,False,False
2,False,False,False
3,False,False,False


## Statistics

In [58]:
df1['marks'].sum()

67.0

In [59]:
df1['marks'].mean()

22.333333333333332

In [60]:
df1['marks'].cumsum()

0     NaN
1    23.0
2    40.0
3    67.0
Name: marks, dtype: float64

In [61]:
df1['marks'].count()

3

In [62]:
df1['marks'].var()

25.333333333333336