In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from ucimlrepo import fetch_ucirepo, list_available_datasets

import pprint

ModuleNotFoundError: No module named 'pandas'

## List Available Datasets
### **Print a list of datasets that can be imported via `fetch_ucirepo`**

In [3]:
list_available_datasets()
list_available_datasets(filter='aim-ahead')   # only list datasets for AIM-AHEAD project
list_available_datasets(search='diabe')

NameError: name 'list_available_datasets' is not defined

### **Should not work for datasets that are not part of the list**

In [5]:
try:
    fetch_ucirepo(name='defungi')
    # # test invalid inputs
    # fetch_ucirepo(name='heart diseaseeeee') 
    # fetch_ucirepo(id=10000)
except Exception as e:
    print(e)

-------------------------------------
The following datasets are available:
-------------------------------------
Dataset Name                                             ID    
------------                                             --    
Heart Disease                                            45    
Parkinsons Telemonitoring                                189   
Thoracic Surgery Data                                    277   
Diabetes 130-US hospitals for years 1999-2008            296   
Diabetic Retinopathy Debrecen                            329   
HCV data                                                 571   
Myocardial infarction complications                      579   
Glioma Grading Clinical and Mutation Features Dataset    759   
Sepsis Survival Minimal Clinical Records                 827   

"iris" dataset (id=53) exists in the repository, but is not available for import.


## **Import dataset by ID**

In [3]:
sepsis = fetch_ucirepo(id=827)

### **Metadata**

In [8]:
pprint.pp(sepsis.metadata)

{'uci_id': 827,
 'name': 'Sepsis Survival Minimal Clinical Records',
 'repository_url': 'https://archive.ics.uci.edu/dataset/827/sepsis+survival+minimal+clinical+records',
 'data_url': 'https://archive.ics.uci.edu/static/public/827/data.csv',
 'abstract': 'This dataset collection contains minimal health records of '
             '110,204 admissions (primary cohort), 19,051 admissions (study '
             'cohort), and 137 admissions (validation cohort) of patients who '
             'had sepsis.',
 'area': 'Life Science',
 'tasks': ['Classification'],
 'characteristics': ['Multivariate'],
 'num_instances': 110341,
 'num_features': 3,
 'attribute_types': ['Integer'],
 'target_col': ['hospital_outcome_1alive_0dead'],
 'index_col': None,
 'has_missing_values': 'no',
 'missing_values_symbol': None,
 'year_of_dataset_creation': 2020,
 'last_updated': 'Wed Aug 02 2023',
 'dataset_doi': '10.24432/C53C8N',
 'creators': ['Davide Chicco', 'Giuseppe Jurman'],
 'intro_paper': {'title': 'Survival 

In [13]:
sepsis.metadata.target_col

['hospital_outcome_1alive_0dead']

In [8]:
sepsis.metadata.intro_paper.title

'Survival prediction of patients with sepsis from age, sex, and septic episode number alone'

In [9]:
sepsis.metadata.additional_info.summary

"Primary cohort from Norway:\n4 features for 110,204 patient admissions\nfile: 's41598-020-73558-3_sepsis_survival_primary_cohort.csv'\n\nStudy cohort (subset of the primary cohort) from Norway:\n4 features for 19,051 patient admissions\nfile: 's41598-020-73558-3_sepsis_survival_study_cohort.csv'\n\nValidation cohort from South Korea:\n4 features for 137 patients\nfile: 's41598-020-73558-3_sepsis_survival_validation_cohort.csv'"

### **Data**
**IDs, features, and targets are included as separate dataframes, along with an "original" that combines all of them.**

In [10]:
sepsis.data.features

Unnamed: 0,age_years,sex_0male_1female,episode_number
0,21,1,1
1,20,1,1
2,21,1,1
3,77,0,1
4,72,0,1
...,...,...,...
110336,47,0,1
110337,50,0,1
110338,62,0,1
110339,58,0,1


In [23]:
sepsis.data.targets

Unnamed: 0,hospital_outcome_1alive_0dead
0,1
1,1
2,1
3,1
4,1
...,...
110336,1
110337,0
110338,1
110339,0


In [24]:
sepsis.data.ids   # this dataset no IDs

In [25]:
sepsis.data.original

Unnamed: 0,age_years,sex_0male_1female,episode_number,hospital_outcome_1alive_0dead
0,21,1,1,1
1,20,1,1,1
2,21,1,1,1
3,77,0,1,1
4,72,0,1,1
...,...,...,...,...
110336,47,0,1,1
110337,50,0,1,0
110338,62,0,1,1
110339,58,0,1,0


In [26]:
sepsis.data.headers

Index(['age_years', 'sex_0male_1female', 'episode_number',
       'hospital_outcome_1alive_0dead'],
      dtype='object')

### **Variable Info**

**Displayed in a dataframe format**

In [9]:
sepsis.variables

Unnamed: 0,name,role,type,demographic,description,units,missing_values
0,age_years,Feature,Integer,,,,no
1,sex_0male_1female,Feature,Binary,,,,no
2,episode_number,Feature,Integer,,,,no
3,hospital_outcome_1alive_0dead,Target,Binary,,,,no


## Import by Name

In [27]:
glioma = fetch_ucirepo(name='glioma')
glioma.data.features

Unnamed: 0,Gender,Age_at_diagnosis,Race,IDH1,TP53,ATRX,PTEN,EGFR,CIC,MUC16,...,FUBP1,RB1,NOTCH1,BCOR,CSMD3,SMARCA4,GRIN2A,IDH2,FAT4,PDGFRA
0,0,51.30,white,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,38.72,white,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,0,35.17,white,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,32.78,white,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
4,0,31.51,white,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,1,77.89,white,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
835,0,85.18,white,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
836,1,77.49,white,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
837,0,63.33,white,0,1,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0


In [17]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
iris = fetch_ucirepo(id=189) 
  
# data (as pandas dataframes) 
X = iris.data.features 
y = iris.data.targets 
  
# metadata 
print(iris.metadata) 
  
# variable information 
print(iris.variables) 


{'uci_id': 189, 'name': 'Parkinsons Telemonitoring', 'repository_url': 'https://archive.ics.uci.edu/dataset/189/parkinsons+telemonitoring', 'data_url': 'https://archive.ics.uci.edu/static/public/189/data.csv', 'abstract': "Oxford Parkinson's Disease Telemonitoring Dataset", 'area': 'Life', 'tasks': ['Regression'], 'characteristics': ['Multivariate'], 'num_instances': 5875, 'num_features': 26, 'attribute_types': ['Integer', 'Real'], 'target_col': ['motor_UPDRS', 'total_UPDRS'], 'index_col': ['subject#'], 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2009, 'last_updated': 'Thu Aug 03 2023', 'dataset_doi': '10.24432/C5ZS3N', 'creators': ['Athanasios Tsanas', 'Max Little'], 'intro_paper': {'title': "Accurate Telemonitoring of Parkinson's Disease Progression by Noninvasive Speech Tests", 'authors': 'A. Tsanas, Max A. Little, P. McSharry, L. Ramig', 'published_in': 'IEEE Transactions on Biomedical Engineering', 'year': 2010, 'url': 'https://www.semant

In [4]:
X

NameError: name 'X' is not defined