In [1]:
import pandas as pd
from sklearn import datasets

In [2]:
# Function to load dataset into DataFrame and perform operations
def process_sklearn_dataset(dataset, dataset_name):
    # Load dataset into Pandas DataFrame
    df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
    df['target'] = dataset.target
    
    # Display the shape of the DataFrame
    print(f"\nDataset: {dataset_name}")
    print(f"Shape of DataFrame: {df.shape}")
    
    # Display the number of rows and columns
    num_rows, num_cols = df.shape
    print(f"Number of Rows: {num_rows}")
    print(f"Number of Columns: {num_cols}")
    
    # Show null values in the DataFrame
    null_values = df.isnull().sum().sum()
    print(f"Number of Null Values: {null_values}")
    
    # Print description of the dataset
    print("\nDescription:")
    print(df.describe())

In [3]:
# Example usage for Iris dataset
iris = datasets.load_iris()
process_sklearn_dataset(iris, 'Iris')


Dataset: Iris
Shape of DataFrame: (150, 5)
Number of Rows: 150
Number of Columns: 5
Number of Null Values: 0

Description:
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count         150.000000        150.000000         150.000000   
mean            5.843333          3.057333           3.758000   
std             0.828066          0.435866           1.765298   
min             4.300000          2.000000           1.000000   
25%             5.100000          2.800000           1.600000   
50%             5.800000          3.000000           4.350000   
75%             6.400000          3.300000           5.100000   
max             7.900000          4.400000           6.900000   

       petal width (cm)      target  
count        150.000000  150.000000  
mean           1.199333    1.000000  
std            0.762238    0.819232  
min            0.100000    0.000000  
25%            0.300000    0.000000  
50%            1.300000    1.000000  
75%            1.800000 

In [4]:
# Example usage for Wine dataset
wine = datasets.load_wine()
process_sklearn_dataset(wine, 'Wine')


Dataset: Wine
Shape of DataFrame: (178, 14)
Number of Rows: 178
Number of Columns: 14
Number of Null Values: 0

Description:
          alcohol  malic_acid         ash  alcalinity_of_ash   magnesium  \
count  178.000000  178.000000  178.000000         178.000000  178.000000   
mean    13.000618    2.336348    2.366517          19.494944   99.741573   
std      0.811827    1.117146    0.274344           3.339564   14.282484   
min     11.030000    0.740000    1.360000          10.600000   70.000000   
25%     12.362500    1.602500    2.210000          17.200000   88.000000   
50%     13.050000    1.865000    2.360000          19.500000   98.000000   
75%     13.677500    3.082500    2.557500          21.500000  107.000000   
max     14.830000    5.800000    3.230000          30.000000  162.000000   

       total_phenols  flavanoids  nonflavanoid_phenols  proanthocyanins  \
count     178.000000  178.000000            178.000000       178.000000   
mean        2.295112    2.029270       

In [5]:
# Example usage for Breast Cancer dataset
breast_cancer = datasets.load_breast_cancer()
process_sklearn_dataset(breast_cancer, 'Breast Cancer')


Dataset: Breast Cancer
Shape of DataFrame: (569, 31)
Number of Rows: 569
Number of Columns: 31
Number of Null Values: 0

Description:
       mean radius  mean texture  mean perimeter    mean area  \
count   569.000000    569.000000      569.000000   569.000000   
mean     14.127292     19.289649       91.969033   654.889104   
std       3.524049      4.301036       24.298981   351.914129   
min       6.981000      9.710000       43.790000   143.500000   
25%      11.700000     16.170000       75.170000   420.300000   
50%      13.370000     18.840000       86.240000   551.100000   
75%      15.780000     21.800000      104.100000   782.700000   
max      28.110000     39.280000      188.500000  2501.000000   

       mean smoothness  mean compactness  mean concavity  mean concave points  \
count       569.000000        569.000000      569.000000           569.000000   
mean          0.096360          0.104341        0.088799             0.048919   
std           0.014064          0.05

In [6]:
# Example usage for Diabetes dataset
diabetes = datasets.load_diabetes()
process_sklearn_dataset(diabetes, 'Diabetes')


Dataset: Diabetes
Shape of DataFrame: (442, 11)
Number of Rows: 442
Number of Columns: 11
Number of Null Values: 0

Description:
                age           sex           bmi            bp            s1  \
count  4.420000e+02  4.420000e+02  4.420000e+02  4.420000e+02  4.420000e+02   
mean  -3.634285e-16  1.308343e-16 -8.045349e-16  1.281655e-16 -8.835316e-17   
std    4.761905e-02  4.761905e-02  4.761905e-02  4.761905e-02  4.761905e-02   
min   -1.072256e-01 -4.464164e-02 -9.027530e-02 -1.123996e-01 -1.267807e-01   
25%   -3.729927e-02 -4.464164e-02 -3.422907e-02 -3.665645e-02 -3.424784e-02   
50%    5.383060e-03 -4.464164e-02 -7.283766e-03 -5.670611e-03 -4.320866e-03   
75%    3.807591e-02  5.068012e-02  3.124802e-02  3.564384e-02  2.835801e-02   
max    1.107267e-01  5.068012e-02  1.705552e-01  1.320442e-01  1.539137e-01   

                 s2            s3            s4            s5            s6  \
count  4.420000e+02  4.420000e+02  4.420000e+02  4.420000e+02  4.420000e+02   


In [7]:
# Example usage for Digits dataset
digits = datasets.load_digits()
process_sklearn_dataset(digits, 'Digits')


Dataset: Digits
Shape of DataFrame: (1797, 65)
Number of Rows: 1797
Number of Columns: 65
Number of Null Values: 0

Description:
       pixel_0_0    pixel_0_1    pixel_0_2    pixel_0_3    pixel_0_4  \
count     1797.0  1797.000000  1797.000000  1797.000000  1797.000000   
mean         0.0     0.303840     5.204786    11.835838    11.848080   
std          0.0     0.907192     4.754826     4.248842     4.287388   
min          0.0     0.000000     0.000000     0.000000     0.000000   
25%          0.0     0.000000     1.000000    10.000000    10.000000   
50%          0.0     0.000000     4.000000    13.000000    13.000000   
75%          0.0     0.000000     9.000000    15.000000    15.000000   
max          0.0     8.000000    16.000000    16.000000    16.000000   

         pixel_0_5    pixel_0_6    pixel_0_7    pixel_1_0    pixel_1_1  ...  \
count  1797.000000  1797.000000  1797.000000  1797.000000  1797.000000  ...   
mean      5.781859     1.362270     0.129661     0.005565     1