# Pandas
- library for Data Analysis and Manipulation


## Why Pandas?
- provides ability to work with Tabular data.
- **Tabular Data** - data that is organized into tables having rows and cols


In [1]:
### Installing Pandas
! pip install pandas



In [2]:
import pandas as pd

## Series
- Series is a one dimensional labelled array that can hold/store data

In [3]:
book_title = ['C++', 'Java', 'Python', 'JavaScript']

In [5]:
book_title = pd.Series(book_title)
book_title

0           C++
1          Java
2        Python
3    JavaScript
dtype: object

In [6]:
type(book_title)

pandas.core.series.Series

In [7]:
book_title[2]

'Python'

In [8]:
book_title.index = ['a', 'b', 'c', 'd']

In [9]:
book_title

a           C++
b          Java
c        Python
d    JavaScript
dtype: object

In [10]:
book_title['c']

'Python'

## DataFrame - Introduction
- 2D labelled array having index and columns. 
- multiple series combined together to form a Dataframe.
- most widely used data structure in pandas

In [11]:
import numpy as np

In [13]:
arr = np.random.randint(10, 100, size =(6, 4))
arr

array([[33, 58, 33, 50],
       [88, 67, 57, 41],
       [21, 61, 26, 68],
       [11, 63, 88, 30],
       [31, 54, 97, 38],
       [68, 31, 41, 94]])

In [14]:
df = pd.DataFrame(data=arr)
df

Unnamed: 0,0,1,2,3
0,33,58,33,50
1,88,67,57,41
2,21,61,26,68
3,11,63,88,30
4,31,54,97,38
5,68,31,41,94


In [15]:
type(df)

pandas.core.frame.DataFrame

In [16]:
df[2]

0    33
1    57
2    26
3    88
4    97
5    41
Name: 2, dtype: int64

In [17]:
type(df[2])

pandas.core.series.Series

In [18]:
type(df[0])

pandas.core.series.Series

In [19]:
df.columns =  ['A', 'B','C','D']
df

Unnamed: 0,A,B,C,D
0,33,58,33,50
1,88,67,57,41
2,21,61,26,68
3,11,63,88,30
4,31,54,97,38
5,68,31,41,94


In [20]:
df['D']

0    50
1    41
2    68
3    30
4    38
5    94
Name: D, dtype: int64

In [21]:
df.shape

(6, 4)

In [23]:
df.head(n=3)

Unnamed: 0,A,B,C,D
0,33,58,33,50
1,88,67,57,41
2,21,61,26,68


In [25]:
df.tail(n=2)

Unnamed: 0,A,B,C,D
4,31,54,97,38
5,68,31,41,94


In [26]:
# Extracting Columns
df['C']

0    33
1    57
2    26
3    88
4    97
5    41
Name: C, dtype: int64

In [28]:
# cols = ['A', 'B']
# df[cols]

df[['A', 'B']]

Unnamed: 0,A,B
0,33,58
1,88,67
2,21,61
3,11,63
4,31,54
5,68,31


In [29]:
df[['B', 'D', 'A']]

Unnamed: 0,B,D,A
0,58,50,33
1,67,41,88
2,61,68,21
3,63,30,11
4,54,38,31
5,31,94,68


In [30]:
df

Unnamed: 0,A,B,C,D
0,33,58,33,50
1,88,67,57,41
2,21,61,26,68
3,11,63,88,30
4,31,54,97,38
5,68,31,41,94


In [31]:
# Add New Columns

In [33]:
df['A+B'] = df['A'] + df['B']

In [34]:
df

Unnamed: 0,A,B,C,D,A+B
0,33,58,33,50,91
1,88,67,57,41,155
2,21,61,26,68,82
3,11,63,88,30,74
4,31,54,97,38,85
5,68,31,41,94,99


In [35]:
df['A-B'] = df['A'] * df['B'] 

In [36]:
df

Unnamed: 0,A,B,C,D,A+B,A-B
0,33,58,33,50,91,1914
1,88,67,57,41,155,5896
2,21,61,26,68,82,1281
3,11,63,88,30,74,693
4,31,54,97,38,85,1674
5,68,31,41,94,99,2108


In [39]:
df.drop(columns=['A-B'], inplace=True)

In [40]:
df

Unnamed: 0,A,B,C,D,A+B
0,33,58,33,50,91
1,88,67,57,41,155
2,21,61,26,68,82
3,11,63,88,30,74
4,31,54,97,38,85
5,68,31,41,94,99


### Indexing/Extracting Data

In [43]:
df.index = "p q r s t u".split()

In [44]:
df

Unnamed: 0,A,B,C,D,A+B
p,33,58,33,50,91
q,88,67,57,41,155
r,21,61,26,68,82
s,11,63,88,30,74
t,31,54,97,38,85
u,68,31,41,94,99


In [45]:
# loc - location
df.loc['p']

A      33
B      58
C      33
D      50
A+B    91
Name: p, dtype: int64

In [46]:
df.loc['t']

A      31
B      54
C      97
D      38
A+B    85
Name: t, dtype: int64

In [47]:
# iloc - integer location
df.iloc[4]

A      31
B      54
C      97
D      38
A+B    85
Name: t, dtype: int64

In [48]:
df.iloc[0]

A      33
B      58
C      33
D      50
A+B    91
Name: p, dtype: int64

In [49]:
df.iloc[2: 5]

Unnamed: 0,A,B,C,D,A+B
r,21,61,26,68,82
s,11,63,88,30,74
t,31,54,97,38,85


In [51]:
df.iloc[2:5][['A', 'B']]

Unnamed: 0,A,B
r,21,61
s,11,63
t,31,54


In [53]:
df.iloc[-2:][['D', 'A+B']]

Unnamed: 0,D,A+B
t,38,85
u,94,99


In [55]:
df.iloc[-2:, -2: ]

Unnamed: 0,D,A+B
t,38,85
u,94,99


### Masking - Boolean Indexing

In [56]:
df

Unnamed: 0,A,B,C,D,A+B
p,33,58,33,50,91
q,88,67,57,41,155
r,21,61,26,68,82
s,11,63,88,30,74
t,31,54,97,38,85
u,68,31,41,94,99


In [58]:
# masking
mask = df > 30
mask

Unnamed: 0,A,B,C,D,A+B
p,True,True,True,True,True
q,True,True,True,True,True
r,False,True,False,True,True
s,False,True,True,False,True
t,True,True,True,True,True
u,True,True,True,True,True


In [59]:
df[mask]

Unnamed: 0,A,B,C,D,A+B
p,33.0,58,33.0,50.0,91
q,88.0,67,57.0,41.0,155
r,,61,,68.0,82
s,,63,88.0,,74
t,31.0,54,97.0,38.0,85
u,68.0,31,41.0,94.0,99


In [60]:
df[ df > 30 ]

Unnamed: 0,A,B,C,D,A+B
p,33.0,58,33.0,50.0,91
q,88.0,67,57.0,41.0,155
r,,61,,68.0,82
s,,63,88.0,,74
t,31.0,54,97.0,38.0,85
u,68.0,31,41.0,94.0,99


In [63]:
mask = df['B'] > 40
mask

p     True
q     True
r     True
s     True
t     True
u    False
Name: B, dtype: bool

In [64]:
df[mask]

Unnamed: 0,A,B,C,D,A+B
p,33,58,33,50,91
q,88,67,57,41,155
r,21,61,26,68,82
s,11,63,88,30,74
t,31,54,97,38,85


In [66]:
# extracting values from Col C and col D where B column has value >40
df[ df['B'] > 40 ][['C', 'D']]

Unnamed: 0,C,D
p,33,50
q,57,41
r,26,68
s,88,30
t,97,38


In [68]:
df['A']>40

p    False
q     True
r    False
s    False
t    False
u     True
Name: A, dtype: bool

In [70]:
df['D'] < 50

p    False
q     True
r    False
s     True
t     True
u    False
Name: D, dtype: bool

In [71]:
(df['A']> 40) & (df['D']<50)

p    False
q     True
r    False
s    False
t    False
u    False
dtype: bool

In [72]:
df[ (df['A']> 40) & (df['D']<50) ]

Unnamed: 0,A,B,C,D,A+B
q,88,67,57,41,155


In [73]:
df

Unnamed: 0,A,B,C,D,A+B
p,33,58,33,50,91
q,88,67,57,41,155
r,21,61,26,68,82
s,11,63,88,30,74
t,31,54,97,38,85
u,68,31,41,94,99


In [75]:
df_array = df.values
df_array

array([[ 33,  58,  33,  50,  91],
       [ 88,  67,  57,  41, 155],
       [ 21,  61,  26,  68,  82],
       [ 11,  63,  88,  30,  74],
       [ 31,  54,  97,  38,  85],
       [ 68,  31,  41,  94,  99]])

## Iris Dataset -  Introduction

In [79]:
!ls

iris.csv     pandas.ipynb


In [81]:
iris = pd.read_csv("./iris.csv")

In [82]:
type(iris)

pandas.core.frame.DataFrame

In [83]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [84]:
iris.tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica
149,5.9,3.0,5.1,1.8,virginica


In [87]:
#  total of 150 data points, 
#  columns are 5
iris.shape

(150, 5)

In [88]:
iris.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

In [90]:
iris.dtypes

sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
dtype: object

In [91]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [92]:
iris.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [94]:
iris['species'].nunique()

3

In [96]:
iris['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [102]:
#  How many setosa flowers are there?
iris [ iris['species'] == 'versicolor' ].shape

(50, 5)

In [107]:
iris['species'].value_counts()['virginica']

50

In [110]:
iris['sepal_length'].mean()

5.843333333333335

In [112]:
iris['petal_width'].max()

2.5

In [113]:
iris['petal_width'].min()

0.1

In [114]:
iris['petal_width'].sum()

179.8

In [115]:
iris['petal_width']

0      0.2
1      0.2
2      0.2
3      0.2
4      0.2
      ... 
145    2.3
146    1.9
147    2.0
148    2.3
149    1.8
Name: petal_width, Length: 150, dtype: float64

In [117]:
iris.sort_values(by=["sepal_length", "sepal_width"])

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
13,4.3,3.0,1.1,0.1,setosa
8,4.4,2.9,1.4,0.2,setosa
38,4.4,3.0,1.3,0.2,setosa
42,4.4,3.2,1.3,0.2,setosa
41,4.5,2.3,1.3,0.3,setosa
...,...,...,...,...,...
118,7.7,2.6,6.9,2.3,virginica
122,7.7,2.8,6.7,2.0,virginica
135,7.7,3.0,6.1,2.3,virginica
117,7.7,3.8,6.7,2.2,virginica


In [122]:
iris['species'].apply(len)

0      6
1      6
2      6
3      6
4      6
      ..
145    9
146    9
147    9
148    9
149    9
Name: species, Length: 150, dtype: int64

In [121]:
len('setosa')

6

In [123]:
len('virginica')

9

In [125]:
iris.apply(lambda x: x + x)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,10.2,7.0,2.8,0.4,setosasetosa
1,9.8,6.0,2.8,0.4,setosasetosa
2,9.4,6.4,2.6,0.4,setosasetosa
3,9.2,6.2,3.0,0.4,setosasetosa
4,10.0,7.2,2.8,0.4,setosasetosa
...,...,...,...,...,...
145,13.4,6.0,10.4,4.6,virginicavirginica
146,12.6,5.0,10.0,3.8,virginicavirginica
147,13.0,6.0,10.4,4.0,virginicavirginica
148,12.4,6.8,10.8,4.6,virginicavirginica


### Grouping Data Together

In [126]:
iris.aggregate('min')

sepal_length       4.3
sepal_width          2
petal_length         1
petal_width        0.1
species         setosa
dtype: object

In [127]:
iris.aggregate(['min', 'max', 'mean', 'median'])

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
min,4.3,2.0,1.0,0.1,setosa
max,7.9,4.4,6.9,2.5,virginica
mean,5.843333,3.054,3.758667,1.198667,
median,5.8,3.0,4.35,1.3,


In [129]:
groupby = iris.groupby('species')
groupby

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fc70fa30910>

In [130]:
groupby.min()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,4.3,2.3,1.0,0.1
versicolor,4.9,2.0,3.0,1.0
virginica,4.9,2.2,4.5,1.4


In [131]:
groupby.mean()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,5.006,3.418,1.464,0.244
versicolor,5.936,2.77,4.26,1.326
virginica,6.588,2.974,5.552,2.026


In [135]:
iris[ iris['species'] == 'setosa' ]['sepal_length'].mean()

5.005999999999999

In [136]:
groupby.count()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,50,50,50,50
versicolor,50,50,50,50
virginica,50,50,50,50


In [137]:
groupby.sum()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,250.3,170.9,73.2,12.2
versicolor,296.8,138.5,213.0,66.3
virginica,329.4,148.7,277.6,101.3


In [141]:
groupby.describe().T

Unnamed: 0,species,setosa,versicolor,virginica
sepal_length,count,50.0,50.0,50.0
sepal_length,mean,5.006,5.936,6.588
sepal_length,std,0.35249,0.516171,0.63588
sepal_length,min,4.3,4.9,4.9
sepal_length,25%,4.8,5.6,6.225
sepal_length,50%,5.0,5.9,6.5
sepal_length,75%,5.2,6.3,6.9
sepal_length,max,5.8,7.0,7.9
sepal_width,count,50.0,50.0,50.0
sepal_width,mean,3.418,2.77,2.974


### Handling Missing Data
- dropna()
- fillna()

In [167]:
iris = pd.read_csv('./iris.csv')

In [168]:
import warnings
warnings.filterwarnings('ignore')

In [169]:
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [170]:
nan_idx = np.random.randint(0, 150, 20)
iris['sepal_length'][nan_idx] = np.nan

In [171]:
iris['sepal_length']

0      5.1
1      4.9
2      4.7
3      4.6
4      5.0
      ... 
145    6.7
146    6.3
147    6.5
148    6.2
149    5.9
Name: sepal_length, Length: 150, dtype: float64

In [172]:
nan_idx = np.random.randint(0, 150, 15)
iris['petal_width'][nan_idx] = np.nan

In [173]:
iris['petal_width']

0      0.2
1      0.2
2      0.2
3      0.2
4      0.2
      ... 
145    2.3
146    1.9
147    2.0
148    2.3
149    1.8
Name: petal_width, Length: 150, dtype: float64

In [174]:
iris.isna().sum()

sepal_length    19
sepal_width      0
petal_length     0
petal_width     14
species          0
dtype: int64

In [166]:
# iris.dropna()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
143,6.8,3.2,5.9,2.3,virginica
144,6.7,3.3,5.7,2.5,virginica
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica


In [176]:
iris['sepal_length'].fillna(value = "FILLTHIS")[:50]

0          5.1
1          4.9
2          4.7
3          4.6
4            5
5          5.4
6          4.6
7            5
8          4.4
9     FILLTHIS
10         5.4
11         4.8
12         4.8
13         4.3
14         5.8
15    FILLTHIS
16         5.4
17         5.1
18    FILLTHIS
19         5.1
20         5.4
21         5.1
22         4.6
23         5.1
24    FILLTHIS
25    FILLTHIS
26           5
27         5.2
28         5.2
29    FILLTHIS
30         4.8
31         5.4
32         5.2
33         5.5
34    FILLTHIS
35           5
36         5.5
37         4.9
38         4.4
39    FILLTHIS
40    FILLTHIS
41         4.5
42         4.4
43           5
44         5.1
45         4.8
46         5.1
47    FILLTHIS
48         5.3
49           5
Name: sepal_length, dtype: object

In [182]:
iris['sepal_length'] =  iris['sepal_length'].fillna(value = round(iris['sepal_length'].mean(), 1) )

In [184]:
iris['sepal_length'][:50]

0     5.1
1     4.9
2     4.7
3     4.6
4     5.0
5     5.4
6     4.6
7     5.0
8     4.4
9     5.9
10    5.4
11    4.8
12    4.8
13    4.3
14    5.8
15    5.9
16    5.4
17    5.1
18    5.9
19    5.1
20    5.4
21    5.1
22    4.6
23    5.1
24    5.9
25    5.9
26    5.0
27    5.2
28    5.2
29    5.9
30    4.8
31    5.4
32    5.2
33    5.5
34    5.9
35    5.0
36    5.5
37    4.9
38    4.4
39    5.9
40    5.9
41    4.5
42    4.4
43    5.0
44    5.1
45    4.8
46    5.1
47    5.9
48    5.3
49    5.0
Name: sepal_length, dtype: float64

In [186]:
iris['petal_length'][:50]

0     1.4
1     1.4
2     1.3
3     1.5
4     1.4
5     1.7
6     1.4
7     1.5
8     1.4
9     1.5
10    1.5
11    1.6
12    1.4
13    1.1
14    1.2
15    1.5
16    1.3
17    1.4
18    1.7
19    1.5
20    1.7
21    1.5
22    1.0
23    1.7
24    1.9
25    1.6
26    1.6
27    1.5
28    1.4
29    1.6
30    1.6
31    1.5
32    1.5
33    1.4
34    1.5
35    1.2
36    1.3
37    1.5
38    1.3
39    1.5
40    1.3
41    1.3
42    1.3
43    1.6
44    1.9
45    1.4
46    1.6
47    1.4
48    1.5
49    1.4
Name: petal_length, dtype: float64

In [187]:
iris.isna().sum()

sepal_length     0
sepal_width      0
petal_length     0
petal_width     14
species          0
dtype: int64

### Concat/Merge Dataframes

In [188]:
new_df = pd.DataFrame( np.random.randint(0, 7, size=(10, 4)))

In [189]:
new_df

Unnamed: 0,0,1,2,3
0,0,0,0,6
1,4,6,0,3
2,1,4,2,4
3,3,1,3,0
4,1,3,0,0
5,2,0,5,4
6,1,3,2,4
7,2,1,2,5
8,2,1,5,5
9,4,3,3,5


In [190]:
new_df['species'] = "new-species"

In [191]:
new_df.head()

Unnamed: 0,0,1,2,3,species
0,0,0,0,6,new-species
1,4,6,0,3,new-species
2,1,4,2,4,new-species
3,3,1,3,0,new-species
4,1,3,0,0,new-species


In [192]:
iris.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

In [193]:
new_df.columns = iris.columns

In [194]:
new_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,0,0,0,6,new-species
1,4,6,0,3,new-species
2,1,4,2,4,new-species
3,3,1,3,0,new-species
4,1,3,0,0,new-species


In [196]:
iris = pd.concat((iris, new_df), axis=0 )

In [197]:
iris.shape

(160, 5)

In [198]:
## Merging Dataframes

In [208]:
df1 = pd.DataFrame({'S_Name' : ['Mohit', 'Jatin', 'Prateek', 'Mohit'],
                    'CGPA' :  [ 2 , 4, 5, 3]})

In [209]:
df1

Unnamed: 0,S_Name,CGPA
0,Mohit,2
1,Jatin,4
2,Prateek,5
3,Mohit,3


In [210]:
df2 = pd.DataFrame({'T_Name' : ['Mohit', 'Jatin', 'Prateek', 'Mohit'],
                    'CGPA' :  [ 3 , 6, 8, 9]})


In [211]:
df2

Unnamed: 0,T_Name,CGPA
0,Mohit,3
1,Jatin,6
2,Prateek,8
3,Mohit,9


In [212]:
df1.merge(df2, how = 'inner')

Unnamed: 0,S_Name,CGPA,T_Name
0,Mohit,3,Mohit


In [213]:
df1.merge(df2, how = 'left')

Unnamed: 0,S_Name,CGPA,T_Name
0,Mohit,2,
1,Jatin,4,
2,Prateek,5,
3,Mohit,3,Mohit


In [214]:
df1.merge(df2, how= 'right')

Unnamed: 0,S_Name,CGPA,T_Name
0,Mohit,3,Mohit
1,,6,Jatin
2,,8,Prateek
3,,9,Mohit


In [215]:
df1.merge(df2, how= 'outer')

Unnamed: 0,S_Name,CGPA,T_Name
0,Mohit,2,
1,Jatin,4,
2,Prateek,5,
3,Mohit,3,Mohit
4,,6,Jatin
5,,8,Prateek
6,,9,Mohit


### Output Files

In [223]:
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
5,2.0,0.0,5.0,4.0,new-species
6,1.0,3.0,2.0,4.0,new-species
7,2.0,1.0,2.0,5.0,new-species
8,2.0,1.0,5.0,5.0,new-species


In [224]:
iris.isna().sum()

sepal_length     0
sepal_width      0
petal_length     0
petal_width     14
species          0
dtype: int64

In [228]:
iris.to_csv('./modified_iris.csv', index=False)

In [229]:
modified_iris = pd.read_csv('./modified_iris.csv')

In [230]:
modified_iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [231]:
iris.to_excel("./new_iris.xlsx", sheet_name="iris sheet")