# TP Pandas — Corrigé (Dataset Iris)

Ce corrigé charge le dataset Iris via scikit-learn, réalise des manipulations pandas et génère des graphiques **avec matplotlib uniquement**.

## Chargement des données

In [4]:

import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

iris = load_iris()
print(type(iris))
print(iris.keys())
print(iris['target'])


<class 'sklearn.utils._bunch.Bunch'>
dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [8]:
iris['feature_names']

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [17]:
iris['target_names']

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [2]:
import pandas as pd

df = pd.DataFrame(iris.data, columns=iris['feature_names'])
print(df.head())
df['species'] = [iris.target_names[k] for k in iris.target]
df.head()

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


## Dimensions et statistiques

In [28]:

# Afficher shape, info et describe
print("Shape:", df.shape)
print()
print(df.info())
print()
print(df.dtypes)
print()
df.describe()


Shape: (150, 5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   species            150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None

sepal length (cm)    float64
sepal width (cm)     float64
petal length (cm)    float64
petal width (cm)     float64
species               object
dtype: object



Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


## Renommer les colonnes

In [None]:

df = df.rename(columns={
    "sepal length (cm)": "SepalLengthCm",
    "sepal width (cm)": "SepalWidthCm",
    "petal length (cm)": "PetalLengthCm",
    "petal width (cm)": "PetalWidthCm",
    "species": "Species"
})
df.head()


## Ajouter des colonnes

In [None]:

df["PetalRatio"] = df["PetalLengthCm"] / df["PetalWidthCm"]
df["SepalArea"] = df["SepalLengthCm"] * df["SepalWidthCm"]
df.head()


## Supprimer une colonne

In [None]:

# Exemple : on supprime SepalArea si on ne l'utilise plus
df = df.drop(columns=["SepalArea"])
df.head()


## Supprimer des lignes

In [None]:

df = df[df["SepalLengthCm"] >= 5.0]
df.shape


## Filtrer l'espèce 'setosa'

In [None]:

df_setosa = df[df["Species"] == "setosa"]
df_setosa.head()


## Comptage par espèce

In [None]:

counts = df["Species"].value_counts()
counts


## Visualisations (matplotlib)

In [None]:

# 1) Histogramme - SepalLengthCm
plt.figure()
plt.hist(df["SepalLengthCm"], bins=15)
plt.title("Histogramme — SepalLengthCm")
plt.xlabel("SepalLengthCm")
plt.ylabel("Fréquence")
plt.tight_layout()
plt.show()


In [None]:

# 2) Nuage de points - SepalLengthCm vs PetalLengthCm
plt.figure()
plt.scatter(df["SepalLengthCm"], df["PetalLengthCm"])
plt.title("Scatter — SepalLengthCm vs PetalLengthCm")
plt.xlabel("SepalLengthCm")
plt.ylabel("PetalLengthCm")
plt.tight_layout()
plt.show()


In [None]:

# 3) Boxplot de PetalLengthCm par espèce
species_list = sorted(df["Species"].unique())
data_by_species = [df[df["Species"] == sp]["PetalLengthCm"].values for sp in species_list]

plt.figure()
plt.boxplot(data_by_species, labels=species_list)
plt.title("Boxplot — PetalLengthCm par espèce")
plt.xlabel("Espèce")
plt.ylabel("PetalLengthCm")
plt.tight_layout()
plt.show()


In [None]:

# 4) Diagramme en barres — Nombre d'occurrences par espèce
plt.figure()
plt.bar(counts.index, counts.values)
plt.title("Comptage par espèce")
plt.xlabel("Espèce")
plt.ylabel("Effectif")
plt.tight_layout()
plt.show()
