## Resetting the index

In [23]:
import pandas as pd
iris = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data")
df = iris.copy()
df

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


**reset_index() function**

In [24]:
df.reset_index() ## it creates new index with the existing one

Unnamed: 0,index,5.1,3.5,1.4,0.2,Iris-setosa
0,0,4.9,3.0,1.4,0.2,Iris-setosa
1,1,4.7,3.2,1.3,0.2,Iris-setosa
2,2,4.6,3.1,1.5,0.2,Iris-setosa
3,3,5.0,3.6,1.4,0.2,Iris-setosa
4,4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...,...
144,144,6.7,3.0,5.2,2.3,Iris-virginica
145,145,6.3,2.5,5.0,1.9,Iris-virginica
146,146,6.5,3.0,5.2,2.0,Iris-virginica
147,147,6.2,3.4,5.4,2.3,Iris-virginica


**if we want to drop the older index**

In [25]:
df.reset_index(drop = True,inplace = True)
df.index

RangeIndex(start=0, stop=149, step=1)

In [26]:
df.columns = ['sl','sw','pl','pw','flower_type']
df

Unnamed: 0,sl,sw,pl,pw,flower_type
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...,...
144,6.7,3.0,5.2,2.3,Iris-virginica
145,6.3,2.5,5.0,1.9,Iris-virginica
146,6.5,3.0,5.2,2.0,Iris-virginica
147,6.2,3.4,5.4,2.3,Iris-virginica


**Deleting the data column wise**

In [27]:
df.drop('sl')

KeyError: "['sl'] not found in axis"

**Above command gave error because there is no such index as 'sl', so we need to change axis to 1 for column**

In [28]:
df.drop('sl',axis = 1,inplace = True)

In [29]:
df

Unnamed: 0,sw,pl,pw,flower_type
0,3.0,1.4,0.2,Iris-setosa
1,3.2,1.3,0.2,Iris-setosa
2,3.1,1.5,0.2,Iris-setosa
3,3.6,1.4,0.2,Iris-setosa
4,3.9,1.7,0.4,Iris-setosa
...,...,...,...,...
144,3.0,5.2,2.3,Iris-virginica
145,2.5,5.0,1.9,Iris-virginica
146,3.0,5.2,2.0,Iris-virginica
147,3.4,5.4,2.3,Iris-virginica


In [30]:
df.describe()

Unnamed: 0,sw,pl,pw
count,149.0,149.0,149.0
mean,3.051007,3.774497,1.205369
std,0.433499,1.759651,0.761292
min,2.0,1.0,0.1
25%,2.8,1.6,0.3
50%,3.0,4.4,1.3
75%,3.3,5.1,1.8
max,4.4,6.9,2.5


In [31]:
del df['sw']
df.describe()

Unnamed: 0,pl,pw
count,149.0,149.0
mean,3.774497,1.205369
std,1.759651,0.761292
min,1.0,0.1
25%,1.6,0.3
50%,4.4,1.3
75%,5.1,1.8
max,6.9,2.5


In [32]:
df.head()

Unnamed: 0,pl,pw,flower_type
0,1.4,0.2,Iris-setosa
1,1.3,0.2,Iris-setosa
2,1.5,0.2,Iris-setosa
3,1.4,0.2,Iris-setosa
4,1.7,0.4,Iris-setosa


In [34]:
df = iris.copy()
df.columns = ['sl','sw','pl','pw','flower_type']
df.describe()

Unnamed: 0,sl,sw,pl,pw
count,149.0,149.0,149.0,149.0
mean,5.848322,3.051007,3.774497,1.205369
std,0.828594,0.433499,1.759651,0.761292
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.4,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


**Adding a new column to a dataframe**

In [35]:
df['diff_pl_pw'] = df['pl'] - df['pw']
df.tail()

Unnamed: 0,sl,sw,pl,pw,flower_type,diff_pl_pw
144,6.7,3.0,5.2,2.3,Iris-virginica,2.9
145,6.3,2.5,5.0,1.9,Iris-virginica,3.1
146,6.5,3.0,5.2,2.0,Iris-virginica,3.2
147,6.2,3.4,5.4,2.3,Iris-virginica,3.1
148,5.9,3.0,5.1,1.8,Iris-virginica,3.3


In [83]:
import pandas as pd
import numpy as np
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',header = None)

df.columns = ['sl','sw','pl','pw','sp']

cs1 = df[df['sp'] == 'Iris-setosa'] 
cs2 = df[df['sp'] == 'Iris-versicolor'] 
cs3 = df[df['sp'] == 'Iris-virginica'] 

k1 = cs1.describe()
k2 = cs2.describe()
k3 = cs3.describe()


k1_val = k1.values
k2_val = k2.values
k3_val = k3.values

def print_it(arr,strin):
    a = [3,7,1]
    for i in a:
        for j in range(arr[i].size):
            print(format(arr[i][j],'.2f'),end = ' ')
        print(strin)
            

print_it(k1_val,'Iris-setosa')
print_it(k2_val,'Iris-versicolor')
print_it(k3_val,'Iris-virginica')



4.30 2.30 1.00 0.10 Iris-setosa
5.80 4.40 1.90 0.60 Iris-setosa
5.01 3.42 1.46 0.24 Iris-setosa
4.90 2.00 3.00 1.00 Iris-versicolor
7.00 3.40 5.10 1.80 Iris-versicolor
5.94 2.77 4.26 1.33 Iris-versicolor
4.90 2.20 4.50 1.40 Iris-virginica
7.90 3.80 6.90 2.50 Iris-virginica
6.59 2.97 5.55 2.03 Iris-virginica


## read function value_counts() on data frames

**data_frame.values check the usage**

**format() function usage**