In [1]:
# Install package for UCI repo
!pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [2]:
import ucimlrepo
import pandas as pd

In [23]:
# 1. Load a dataset from UCIMLrepo (e.g., Iris dataset)
# Download dataset
uci_dataset = ucimlrepo.fetch_ucirepo(id=53)
# alternatively: fetch_ucirepo(name='CDC Diabetes Health Indicators')
# access data
X = uci_dataset.data.features
y = uci_dataset.data.targets
print(f"Features {X.columns.to_list()}")
print(f"Target {y[y.columns.tolist()[0]].unique()}")

Features ['sepal length', 'sepal width', 'petal length', 'petal width']
Target ['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


In [35]:
# Add the target variable to the DataFrame
iris_df = pd.concat([X, y], axis=1)
iris_df['species_name'] = iris_df[y.columns.tolist()[0]].map({'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2})
print(f"Data columns {iris_df.columns.tolist()}")
print(iris_df.head())

Data columns ['sepal length', 'sepal width', 'petal length', 'petal width', 'class', 'species_name']
   sepal length  sepal width  petal length  petal width        class  \
0           5.1          3.5           1.4          0.2  Iris-setosa   
1           4.9          3.0           1.4          0.2  Iris-setosa   
2           4.7          3.2           1.3          0.2  Iris-setosa   
3           4.6          3.1           1.5          0.2  Iris-setosa   
4           5.0          3.6           1.4          0.2  Iris-setosa   

   species_name  
0             0  
1             0  
2             0  
3             0  
4             0  


In [7]:
# 2. Filtering
# Filter the DataFrame to get only the rows where sepal_length is greater than 5
filtered_df = iris_df[iris_df['sepal length'] > 5]
print("Filtered DataFrame (sepal length > 5):")
print(filtered_df.head())

Filtered DataFrame (sepal length > 5):
    sepal length  sepal width  petal length  petal width
0            5.1          3.5           1.4          0.2
5            5.4          3.9           1.7          0.4
10           5.4          3.7           1.5          0.2
14           5.8          4.0           1.2          0.2
15           5.7          4.4           1.5          0.4


In [8]:
# 3. Sorting
# Sort the DataFrame by sepal_width in descending order
sorted_df = iris_df.sort_values(by='sepal width', ascending=False)
print("Sorted DataFrame (by sepal width, descending):")
print(sorted_df.head())

Sorted DataFrame (by sepal width, descending):
    sepal length  sepal width  petal length  petal width
15           5.7          4.4           1.5          0.4
33           5.5          4.2           1.4          0.2
32           5.2          4.1           1.5          0.1
14           5.8          4.0           1.2          0.2
5            5.4          3.9           1.7          0.4


In [25]:
# 4. Grouping
# Group the DataFrame by species and calculate the mean of each feature
grouped_df = iris_df.groupby(y.columns.tolist()).mean()
print("Grouped DataFrame (mean of each feature by species):")
print(grouped_df)

Grouped DataFrame (mean of each feature by species):
                 sepal length  sepal width  petal length  petal width  \
class                                                                   
Iris-setosa             5.006        3.418         1.464        0.244   
Iris-versicolor         5.936        2.770         4.260        1.326   
Iris-virginica          6.588        2.974         5.552        2.026   

                species_name  
class                         
Iris-setosa              NaN  
Iris-versicolor          NaN  
Iris-virginica           NaN  


In [36]:
# 5. Combining Operations
# Filter, then group, then sort
combined_df = iris_df[iris_df['petal length'] > 4].groupby(y.columns.tolist()).mean().sort_values(by='sepal width', ascending=False)
print("Combined Operations (filtered, grouped, and sorted):")
print(combined_df)

Combined Operations (filtered, grouped, and sorted):
                 sepal length  sepal width  petal length  petal width  \
class                                                                   
Iris-virginica       6.588000     2.974000      5.552000     2.026000   
Iris-versicolor      6.147059     2.902941      4.517647     1.411765   

                 species_name  
class                          
Iris-virginica            2.0  
Iris-versicolor           1.0  
