## Iris Dataset course

In [2]:
# Import the necessary library to load the Iris dataset from scikit-learn
from sklearn.datasets import load_iris
import seaborn as sns

# Import pandas for data manipulation
import pandas as pd

# Load the Iris dataset into a variable. This function returns an object containing the data, the names of the features, and additional information.
iris = load_iris()

# Create a DataFrame from the Iris dataset's features.
# iris.data contains the data, and iris.feature_names contains the column names for the DataFrame.
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

# Add a new column to the DataFrame for the species of each sample.
# iris.target contains the numerical labels (0, 1, 2) corresponding to the species of each observation.
# iris.target_names contains the actual species names corresponding to these numerical labels.
# pd.Categorical.from_codes() converts numerical categories (iris.target) into a categorical data type using the mapping provided by iris.target_names.
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)

# Display the first few rows of the DataFrame to check the structure and the data.
# This is often used to verify that the DataFrame has been constructed correctly.
df.head()


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:

df.info() 
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   sepal length (cm)  150 non-null    float64 
 1   sepal width (cm)   150 non-null    float64 
 2   petal length (cm)  150 non-null    float64 
 3   petal width (cm)   150 non-null    float64 
 4   species            150 non-null    category
dtypes: category(1), float64(4)
memory usage: 5.1 KB


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


## Day 3 - Basics of data manipulation

In [8]:
species_counts = df['species'].value_counts()
setosa_count = species_counts['setosa']

print("Number of setosa samples: ", setosa_count)
print("Total species ",  species_counts)

Number of setosa samples:  50
Total species  species
setosa        50
versicolor    50
virginica     50
Name: count, dtype: int64


In [15]:
setosa_df = df[df.species == 'setosa'] 
sepal_df = df.loc[:, ['sepal length (cm)', 'sepal width (cm)']]

print(sepal_df)

     sepal length (cm)  sepal width (cm)
0                  5.1               3.5
1                  4.9               3.0
2                  4.7               3.2
3                  4.6               3.1
4                  5.0               3.6
..                 ...               ...
145                6.7               3.0
146                6.3               2.5
147                6.5               3.0
148                6.2               3.4
149                5.9               3.0

[150 rows x 2 columns]


In [16]:
sorted_df = df.sort_values(by=['petal length (cm)', 'petal width (cm)'], ascending=[True, False])

In [19]:

df.describe() 
df.groupby('species').agg(['min', 'max', 'mean'])

  df.groupby('species').agg(['min', 'max', 'mean'])


Unnamed: 0_level_0,sepal length (cm),sepal length (cm),sepal length (cm),sepal width (cm),sepal width (cm),sepal width (cm),petal length (cm),petal length (cm),petal length (cm),petal width (cm),petal width (cm),petal width (cm)
Unnamed: 0_level_1,min,max,mean,min,max,mean,min,max,mean,min,max,mean
species,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
setosa,4.3,5.8,5.006,2.3,4.4,3.428,1.0,1.9,1.462,0.1,0.6,0.246
versicolor,4.9,7.0,5.936,2.0,3.4,2.77,3.0,5.1,4.26,1.0,1.8,1.326
virginica,4.9,7.9,6.588,2.2,3.8,2.974,4.5,6.9,5.552,1.4,2.5,2.026
