## Installation
In a Jupyter notebook, install with the command

In [2]:
!pip3 install -U ucimlrepo 



In [13]:
from ucimlrepo import fetch_ucirepo, list_available_datasets
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 80)
import pprint

## List Available Datasets
### **Print a list of datasets that can be imported via `fetch_ucirepo`**

In [4]:
#list_available_datasets()
#list_available_datasets(filter='aim-ahead')   # only list datasets for AIM-AHEAD project
list_available_datasets(search='diabe')

--------------------------------------------------------------
The following datasets are available for search query "diabe":
--------------------------------------------------------------
Dataset Name                                     ID    
------------                                     --    
Diabetes 130-US Hospitals for Years 1999-2008    296   
Diabetic Retinopathy Debrecen                    329   
Early Stage Diabetes Risk Prediction             529   
CDC Diabetes Health Indicators                   891   



###Fetch Data from Diabetes 130-US Hospitals for Years 1999-2008    ID: 296 

In [5]:
diabetes = fetch_ucirepo(id=296)

## diabetes Object Quick Run Through

fetch_ucirepo(id) created a dictionary named diabetes 

In [6]:
type(diabetes)

ucimlrepo.dotdict.dotdict

Three Keys under the object; data: Contains dataset matrices as pandas dataframes; Contains metadata information about the dataset; Contains variable details presented in a tabular/dataframe format;

In [7]:
diabetes.keys()

dict_keys(['data', 'metadata', 'variables'])

under diabetes, "data"&"metadata" are dictionary,  "variables" is a dataframe

In [8]:
print(type(diabetes.data), type(diabetes.metadata), type(diabetes.variables))

<class 'ucimlrepo.dotdict.dotdict'> <class 'ucimlrepo.dotdict.dotdict'> <class 'pandas.core.frame.DataFrame'>


belows are the structure of "data","metadata"&"variables" 

In [9]:
print(diabetes["data"].keys())
print(diabetes['metadata'].keys())
print(diabetes['variables'])

dict_keys(['ids', 'features', 'targets', 'original', 'headers'])
dict_keys(['uci_id', 'name', 'repository_url', 'data_url', 'abstract', 'area', 'tasks', 'characteristics', 'num_instances', 'num_features', 'feature_types', 'demographics', 'target_col', 'index_col', 'has_missing_values', 'missing_values_symbol', 'year_of_dataset_creation', 'last_updated', 'dataset_doi', 'creators', 'intro_paper', 'additional_info'])
                        name     role         type demographic  \
0               encounter_id       ID                     None   
1                patient_nbr       ID                     None   
2                       race  Feature  Categorical        Race   
3                     gender  Feature  Categorical      Gender   
4                        age  Feature  Categorical         Age   
5                     weight  Feature  Categorical        None   
6          admission_type_id  Feature  Categorical        None   
7   discharge_disposition_id  Feature  Categorical    

store variable dataframe into dfvariables

In [10]:
dfvariables = diabetes["variables"]
dfvariables

Unnamed: 0,name,role,type,demographic,description,units,missing_values
0,encounter_id,ID,,,Unique identifier of an encounter,,no
1,patient_nbr,ID,,,Unique identifier of a patient,,no
2,race,Feature,Categorical,Race,"Values: Caucasian, Asian, African American, Hi...",,yes
3,gender,Feature,Categorical,Gender,"Values: male, female, and unknown/invalid",,no
4,age,Feature,Categorical,Age,"Grouped in 10-year intervals: [0, 10), [10, 20...",,no
5,weight,Feature,Categorical,,Weight in pounds.,,yes
6,admission_type_id,Feature,Categorical,,Integer identifier corresponding to 9 distinct...,,no
7,discharge_disposition_id,Feature,Categorical,,Integer identifier corresponding to 29 distinc...,,no
8,admission_source_id,Feature,Categorical,,Integer identifier corresponding to 21 distinc...,,no
9,time_in_hospital,Feature,Integer,,Integer number of days between admission and d...,,no


In [11]:
for desc in dfvariables[dfvariables["name"]=="readmitted"]["description"]:
    print(desc) #To get the description of a variable

Days to inpatient readmission. Values: <30 if the patient was readmitted in less than 30 days, >30 if the patient was readmitted in more than 30 days, and No for no record of readmission.


store the original data in to dforigin 

In [16]:
dforigin = diabetes.data.original
dforigin["medical_specialty"].unique() # Example of accessing the original data

array(['Pediatrics-Endocrinology', nan, 'InternalMedicine',
       'Family/GeneralPractice', 'Cardiology', 'Surgery-General',
       'Orthopedics', 'Gastroenterology',
       'Surgery-Cardiovascular/Thoracic', 'Nephrology',
       'Orthopedics-Reconstructive', 'Psychiatry', 'Emergency/Trauma',
       'Pulmonology', 'Surgery-Neuro',
       'Obsterics&Gynecology-GynecologicOnco', 'ObstetricsandGynecology',
       'Pediatrics', 'Hematology/Oncology', 'Otolaryngology',
       'Surgery-Colon&Rectal', 'Pediatrics-CriticalCare', 'Endocrinology',
       'Urology', 'Psychiatry-Child/Adolescent', 'Pediatrics-Pulmonology',
       'Neurology', 'Anesthesiology-Pediatric', 'Radiology',
       'Pediatrics-Hematology-Oncology', 'Psychology', 'Podiatry',
       'Gynecology', 'Oncology', 'Pediatrics-Neurology',
       'Surgery-Plastic', 'Surgery-Thoracic',
       'Surgery-PlasticwithinHeadandNeck', 'Ophthalmology',
       'Surgery-Pediatric', 'Pediatrics-EmergencyMedicine',
       'PhysicalMedicineandRe

dforigin breakdown

In [80]:
dforigin[dforigin["readmitted"] == ">30"]


Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,payer_code,medical_specialty,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,diag_1,diag_2,diag_3,number_diagnoses,max_glu_serum,A1Cresult,metformin,repaglinide,nateglinide,chlorpropamide,glimepiride,acetohexamide,glipizide,glyburide,tolbutamide,pioglitazone,rosiglitazone,acarbose,miglitol,troglitazone,tolazamide,examide,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
1,149190,55629189,Caucasian,Female,[10-20),,1,1,7,3,,,59,0,18,0,0,0,276,250.01,255,9,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Up,No,No,No,No,No,Ch,Yes,>30
5,35754,82637451,Caucasian,Male,[50-60),,2,1,2,3,,,31,6,16,0,0,0,414,411,250,9,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,No,Yes,>30
7,63768,114882984,Caucasian,Male,[70-80),,1,1,7,5,,,73,0,12,0,0,0,428,492,250,8,,,No,No,No,No,No,No,No,Steady,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,>30
10,28236,89869032,AfricanAmerican,Female,[40-50),,1,1,7,9,,,47,2,17,0,0,0,250.7,403,996,9,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,No,Yes,>30
14,62256,49726791,AfricanAmerican,Female,[60-70),,3,1,2,1,,,49,5,2,0,0,0,518,998,627,8,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,No,Yes,>30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101754,443842016,183087545,Caucasian,Female,[70-80),,1,1,7,9,,,50,2,33,0,0,0,574,574,250.02,9,,>7,No,No,No,No,No,No,No,Up,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,Ch,Yes,>30
101755,443842022,188574944,Other,Female,[40-50),,1,1,7,14,MD,,73,6,26,0,1,0,592,599,518,9,,>8,No,No,No,No,No,No,Steady,No,No,No,No,No,No,No,No,No,No,Up,No,No,No,No,No,Ch,Yes,>30
101756,443842070,140199494,Other,Female,[60-70),,1,1,7,2,MD,,46,6,17,1,1,1,996,585,403,9,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,No,Yes,>30
101760,443847176,50375628,AfricanAmerican,Female,[60-70),,1,1,7,6,DM,,45,1,25,3,1,2,345,438,412,9,,,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,No,Down,No,No,No,No,No,Ch,Yes,>30


In [44]:
diabetes.data.features.columns

Index(['race', 'gender', 'age', 'weight', 'admission_type_id',
       'discharge_disposition_id', 'admission_source_id', 'time_in_hospital',
       'payer_code', 'medical_specialty', 'num_lab_procedures',
       'num_procedures', 'num_medications', 'number_outpatient',
       'number_emergency', 'number_inpatient', 'diag_1', 'diag_2', 'diag_3',
       'number_diagnoses', 'max_glu_serum', 'A1Cresult', 'metformin',
       'repaglinide', 'nateglinide', 'chlorpropamide', 'glimepiride',
       'acetohexamide', 'glipizide', 'glyburide', 'tolbutamide',
       'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone',
       'tolazamide', 'examide', 'citoglipton', 'insulin',
       'glyburide-metformin', 'glipizide-metformin',
       'glimepiride-pioglitazone', 'metformin-rosiglitazone',
       'metformin-pioglitazone', 'change', 'diabetesMed'],
      dtype='object')

In [58]:
diabetes.data.targets.columns

Index(['readmitted'], dtype='object')

In [76]:
dforigin.head()

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,payer_code,medical_specialty,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,diag_1,diag_2,diag_3,number_diagnoses,max_glu_serum,A1Cresult,metformin,repaglinide,nateglinide,chlorpropamide,glimepiride,acetohexamide,glipizide,glyburide,tolbutamide,pioglitazone,rosiglitazone,acarbose,miglitol,troglitazone,tolazamide,examide,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
0,2278392,8222157,Caucasian,Female,[0-10),,6,25,1,1,,Pediatrics-Endocrinology,41,0,1,0,0,0,250.83,,,1,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,NO
1,149190,55629189,Caucasian,Female,[10-20),,1,1,7,3,,,59,0,18,0,0,0,276.0,250.01,255,9,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Up,No,No,No,No,No,Ch,Yes,>30
2,64410,86047875,AfricanAmerican,Female,[20-30),,1,1,7,2,,,11,5,13,2,0,1,648.0,250.0,V27,6,,,No,No,No,No,No,No,Steady,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,NO
3,500364,82442376,Caucasian,Male,[30-40),,1,1,7,2,,,44,1,16,0,0,0,8.0,250.43,403,7,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Up,No,No,No,No,No,Ch,Yes,NO
4,16680,42519267,Caucasian,Male,[40-50),,1,1,7,1,,,51,0,8,0,0,0,197.0,157.0,250,5,,,No,No,No,No,No,No,Steady,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,Ch,Yes,NO
