In [1]:
import pandas as pd 
import numpy as np 
import scipy as sp 
import matplotlib.pyplot as plt 
import seaborn as sns

In [4]:
kidney_df = pd.read_csv('./datasets/kidney_data.csv')
kidney_df.head(10)

Unnamed: 0,PatientID,Age,Gender,Ethnicity,SocioeconomicStatus,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,...,Itching,QualityOfLifeScore,HeavyMetalsExposure,OccupationalExposureChemicals,WaterQuality,MedicalCheckupsFrequency,MedicationAdherence,HealthLiteracy,Diagnosis,DoctorInCharge
0,1,71,0,0,0,2,31.069414,1,5.128112,1.67622,...,7.556302,76.0768,0,0,1,1.018824,4.966808,9.871449,1,Confidential
1,2,34,0,0,1,3,29.692119,1,18.609552,8.377574,...,6.836766,40.128498,0,0,0,3.923538,8.189275,7.161765,1,Confidential
2,3,80,1,1,0,1,37.394822,1,11.882429,9.607401,...,2.144722,92.872842,0,1,1,1.429906,7.624028,7.354632,1,Confidential
3,4,40,0,2,0,1,31.32968,0,16.020165,0.408871,...,7.077188,90.080321,0,0,0,3.226416,3.282688,6.629587,1,Confidential
4,5,43,0,1,1,2,23.726311,0,7.944146,0.780319,...,3.553118,5.258372,0,0,1,0.285466,3.849498,1.437385,1,Confidential
5,6,22,0,0,0,1,39.155643,0,4.243608,8.847245,...,8.685695,12.79411,0,0,0,0.358613,5.766704,2.066944,1,Confidential
6,7,41,0,1,0,1,35.040487,0,18.224708,8.155523,...,1.995016,38.72303,0,0,1,2.744605,5.51576,3.856676,1,Confidential
7,8,72,1,0,1,3,30.76044,1,18.662717,6.179345,...,2.178408,92.939765,0,0,0,1.828836,8.620466,9.95467,0,Confidential
8,9,21,0,1,0,2,22.32313,0,9.951503,9.449652,...,9.605863,84.420517,0,0,1,3.521973,4.724738,6.725741,1,Confidential
9,10,49,0,3,0,1,24.338507,0,0.1299,6.464306,...,5.170201,4.776171,0,0,0,3.448881,8.93509,4.423802,1,Confidential


In [9]:
print(f'kidney_df shape: {kidney_df.shape}')
kidney_df.info()

kidney_df shape: (1659, 54)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1659 entries, 0 to 1658
Data columns (total 54 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   PatientID                      1659 non-null   int64  
 1   Age                            1659 non-null   int64  
 2   Gender                         1659 non-null   int64  
 3   Ethnicity                      1659 non-null   int64  
 4   SocioeconomicStatus            1659 non-null   int64  
 5   EducationLevel                 1659 non-null   int64  
 6   BMI                            1659 non-null   float64
 7   Smoking                        1659 non-null   int64  
 8   AlcoholConsumption             1659 non-null   float64
 9   PhysicalActivity               1659 non-null   float64
 10  DietQuality                    1659 non-null   float64
 11  SleepQuality                   1659 non-null   float64
 12  FamilyHistoryKidneyD

### Querying & Filtering the DataFrame

In [22]:
# Query by Diet Quality --> Higher than Average Diet Quality
kidney_df['DietQuality'].min(), kidney_df['DietQuality'].mean(),kidney_df['DietQuality'].max()

# Retrieve all observations of patients w/ higher than average diet qualities
high_avg_diet = kidney_df.loc[kidney_df['DietQuality'] > kidney_df['DietQuality'].mean()]

# Sorting by Diet Quality w/ pandas 
high_avg_diet.sort_values(by=['DietQuality', 'Age'], ascending=False) # Primary sort = DietQuality, Secondary sort = Age
print(f'length of high_avg_diet df: {len(high_avg_diet)}')
high_avg_diet.head()

length of high_avg_diet df: 824


Unnamed: 0,PatientID,Age,Gender,Ethnicity,SocioeconomicStatus,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,...,Itching,QualityOfLifeScore,HeavyMetalsExposure,OccupationalExposureChemicals,WaterQuality,MedicalCheckupsFrequency,MedicationAdherence,HealthLiteracy,Diagnosis,DoctorInCharge
1,2,34,0,0,1,3,29.692119,1,18.609552,8.377574,...,6.836766,40.128498,0,0,0,3.923538,8.189275,7.161765,1,Confidential
3,4,40,0,2,0,1,31.32968,0,16.020165,0.408871,...,7.077188,90.080321,0,0,0,3.226416,3.282688,6.629587,1,Confidential
5,6,22,0,0,0,1,39.155643,0,4.243608,8.847245,...,8.685695,12.79411,0,0,0,0.358613,5.766704,2.066944,1,Confidential
7,8,72,1,0,1,3,30.76044,1,18.662717,6.179345,...,2.178408,92.939765,0,0,0,1.828836,8.620466,9.95467,0,Confidential
9,10,49,0,3,0,1,24.338507,0,0.1299,6.464306,...,5.170201,4.776171,0,0,0,3.448881,8.93509,4.423802,1,Confidential
