In [57]:
import pandas as pd
import numpy as np

In [58]:
df = pd.DataFrame(
    [[2.4, np.nan], [6.3, -6.4],
    [np.nan, np.nan], [0.75, -1.3]],
    index = ["a", "b", "c", "d"],
    columns = ["one", "two"]
)

df

Unnamed: 0,one,two
a,2.4,
b,6.3,-6.4
c,,
d,0.75,-1.3


In [59]:
df.sum()

one    9.45
two   -7.70
dtype: float64

In [60]:
df.sum(axis = 1)

a    2.40
b   -0.10
c    0.00
d   -0.55
dtype: float64

In [61]:
df.mean()

one    3.15
two   -3.85
dtype: float64

In [62]:
df.mean(axis = 1)

a    2.400
b   -0.050
c      NaN
d   -0.275
dtype: float64

In [63]:
df.mean(axis = 1, skipna=False)

a      NaN
b   -0.050
c      NaN
d   -0.275
dtype: float64

In [64]:
df.idxmax()

one    b
two    d
dtype: object

In [65]:
df

Unnamed: 0,one,two
a,2.4,
b,6.3,-6.4
c,,
d,0.75,-1.3


In [66]:
index = df.sum(axis = 1).idxmax()
index

'a'

In [67]:
df.idxmin()

one    d
two    b
dtype: object

In [68]:
df

Unnamed: 0,one,two
a,2.4,
b,6.3,-6.4
c,,
d,0.75,-1.3


In [69]:
df.cumsum()   # Each point will have sum upto that point

Unnamed: 0,one,two
a,2.4,
b,8.7,-6.4
c,,
d,9.45,-7.7


### Working on Iris Dataset

In [70]:
from sklearn import datasets
iris = datasets.load_iris()

In [71]:
df = pd.DataFrame(iris.data)

In [72]:
df

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [73]:
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [74]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [75]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [76]:
df['class'] = iris.target

In [77]:
df

Unnamed: 0,0,1,2,3,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [78]:
df['class_name'] = df['class'].apply(lambda x: 'setosa' if x == 0 else ('versicolor' if x == 1 else 'virginica'))

In [79]:
df.head()

Unnamed: 0,0,1,2,3,class,class_name
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


In [80]:
['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [81]:
# df = pd.DataFrame(df, columns = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'])

In [83]:
df = df.rename(columns = {0 : 'sepal length (cm)', 1 : 'sepal width (cm)', 2 : 'petal length (cm)', 3 : 'petal width (cm)'})
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class,class_name
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2,virginica
146,6.3,2.5,5.0,1.9,2,virginica
147,6.5,3.0,5.2,2.0,2,virginica
148,6.2,3.4,5.4,2.3,2,virginica


In [84]:
df['sepal length (cm)'].corr(df['sepal width (cm)'])

-0.11756978413300208

In [90]:
df1 = df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']] 

The history saving thread hit an unexpected error (OperationalError('database or disk is full')).History will not be written to the database.


In [92]:
df1

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [95]:
df["sepal length (cm)"].corr(df["sepal width (cm)"])

-0.11756978413300208

In [96]:
df1.corr()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
sepal length (cm),1.0,-0.11757,0.871754,0.817941
sepal width (cm),-0.11757,1.0,-0.42844,-0.366126
petal length (cm),0.871754,-0.42844,1.0,0.962865
petal width (cm),0.817941,-0.366126,0.962865,1.0


In [97]:
df1.cov()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
sepal length (cm),0.685694,-0.042434,1.274315,0.516271
sepal width (cm),-0.042434,0.189979,-0.329656,-0.121639
petal length (cm),1.274315,-0.329656,3.116278,1.295609
petal width (cm),0.516271,-0.121639,1.295609,0.581006
