In [2]:
!pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [144]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
from os import listdir
from os.path import isfile, join
import scipy
import glob
from tqdm import tqdm
import wave
import random as rand
from librosa.effects import pitch_shift, time_stretch
import pathlib
import splitfolders

from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

import zipfile

# Replace 'path/to/your/zip/file.zip' with the actual path to your zip file
with zipfile.ZipFile('/content/drive/My Drive/ICBHI_final_database.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/drive/My Drive/ICBHI_data')


Mounted at /content/drive/


In [232]:
path_to_audio_files = []

folder_path = "/content/drive/My Drive/ICBHI_data" # THIS IS SO YOU CAN RUN INSTANTLY
audio_and_txt = "/content/drive/My Drive/ICBHI_data/ICBHI_final_database"

for filename in glob.glob(os.path.join(audio_and_txt, '*.wav')):
    path_to_audio_files.append(filename)
audio_files_data = pd.DataFrame(path_to_audio_files, columns = ['audio_file_full_path'])

In [264]:

diagnosis_df = pd.read_csv(os.path.join(folder_path, 'ICBHI_Challenge_diagnosis.txt'), delimiter='\t', names=['Patient number', 'Diagnosis'])
diagnosis_df['Binary_diagnosis'] = diagnosis_df['Diagnosis'].apply(lambda x: 'Healthy' if x =='Healthy'  else 'Unhealthy')

df_demographic_info = pd.read_csv(os.path.join(folder_path, 'ICBHI_Challenge_demographic_information.txt'), delimiter = '\t', names = ['Patient number', 'Age', 'Sex' , 'Adult BMI (kg/m2)', 'Child Weight (kg)' , 'Child Height (cm)'])
df =  df_demographic_info.join(diagnosis_df.set_index('Patient number'), on = 'Patient number', how = 'left')

filenames = [s.split('.')[0] for s in os.listdir(path = audio_and_txt) if '.txt' in s]
filenames.remove("filename_differences")
filenames.remove("filename_format")

df


Unnamed: 0,Patient number,Age,Sex,Adult BMI (kg/m2),Child Weight (kg),Child Height (cm),Diagnosis,Binary_diagnosis
0,101,3.00,F,,19.0,99.0,URTI,Unhealthy
1,102,0.75,F,,9.8,73.0,Healthy,Healthy
2,103,70.00,F,33.00,,,Asthma,Unhealthy
3,104,70.00,F,28.47,,,COPD,Unhealthy
4,105,7.00,F,,32.0,135.0,URTI,Unhealthy
...,...,...,...,...,...,...,...,...
121,222,60.00,M,,,,COPD,Unhealthy
122,223,,,,,,COPD,Unhealthy
123,224,10.00,F,,32.3,143.0,Healthy,Healthy
124,225,0.83,M,,7.8,74.0,Healthy,Healthy


In [265]:
def extract_annotation_data(file_name, root):

    tokens = file_name.split('_')
    recording_info = pd.DataFrame(data = [tokens + [file_name + ".wav"]], columns = ['Patient number', 'Recording index', 'Chest location','Acquisition mode','Recording equipment','audio_file'])
    recording_annotations = pd.read_csv(os.path.join(root, file_name + '.txt'), names = ['Start', 'End', 'Crackles', 'Wheezes'], delimiter= '\t')
    return (recording_info, recording_annotations)


i_list = []
rec_annotations = []
rec_annotations_dict = {}
for s in filenames:

    (i,a) = extract_annotation_data(s, audio_and_txt)
    i_list.append(i)
    rec_annotations.append(a)
    rec_annotations_dict[s] = a

recording_info = pd.concat(i_list, axis = 0)
recording_info["Patient number"] = pd.to_numeric(recording_info["Patient number"])

# df, recording_info
df =  df.join(recording_info.set_index('Patient number'), on = 'Patient number', how = 'left')
#df = pd.merge(df,recording_info, on = "Patient number")

df

Unnamed: 0,Patient number,Age,Sex,Adult BMI (kg/m2),Child Weight (kg),Child Height (cm),Diagnosis,Binary_diagnosis,Recording index,Chest location,Acquisition mode,Recording equipment,audio_file
0,101,3.00,F,,19.0,99.0,URTI,Unhealthy,1b1,Al,sc,Meditron,101_1b1_Al_sc_Meditron.wav
0,101,3.00,F,,19.0,99.0,URTI,Unhealthy,1b1,Pr,sc,Meditron,101_1b1_Pr_sc_Meditron.wav
1,102,0.75,F,,9.8,73.0,Healthy,Healthy,1b1,Ar,sc,Meditron,102_1b1_Ar_sc_Meditron.wav
2,103,70.00,F,33.00,,,Asthma,Unhealthy,2b2,Ar,mc,LittC2SE,103_2b2_Ar_mc_LittC2SE.wav
3,104,70.00,F,28.47,,,COPD,Unhealthy,1b1,Al,sc,Litt3200,104_1b1_Al_sc_Litt3200.wav
...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,224,10.00,F,,32.3,143.0,Healthy,Healthy,1b2,Al,sc,Meditron,224_1b2_Al_sc_Meditron.wav
124,225,0.83,M,,7.8,74.0,Healthy,Healthy,1b1,Pl,sc,Meditron,225_1b1_Pl_sc_Meditron.wav
125,226,4.00,M,,16.7,103.0,Pneumonia,Unhealthy,1b1,Al,sc,Meditron,226_1b1_Al_sc_Meditron.wav
125,226,4.00,M,,16.7,103.0,Pneumonia,Unhealthy,1b1,Ll,sc,Meditron,226_1b1_Ll_sc_Meditron.wav


In [42]:
def audio_features(filename):
    sound, sample_rate = librosa.load(filename)
    stft = np.abs(librosa.stft(sound))

    mfccs = np.mean(librosa.feature.mfcc(y=sound, sr=sample_rate, n_mfcc=40),axis=1)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate),axis=1)
    mel = np.mean(librosa.feature.melspectrogram(y=sound, sr=sample_rate),axis=1)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate),axis=1)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(sound), sr=sample_rate),axis=1)
    zero_crossing = sum(librosa.zero_crossings(sound, pad=False))
    centroids = librosa.feature.spectral_centroid(y=sound, sr=sample_rate)[0].shape[0]
    energy = scipy.linalg.norm(sound)
    return [mfccs,chroma,mel,contrast,tonnetz,zero_crossing,centroids,energy]

In [233]:
tqdm.pandas()

audio_files_data.to_pickle("extracted_features.pkl")


audio_files_data['to_drop'] = audio_files_data['audio_file_full_path'].progress_apply(lambda x: audio_features(x))

audio_files_data.to_pickle("extracted_features1.pkl")



  return pitch_tuning(
100%|██████████| 920/920 [31:48<00:00,  2.07s/it]


In [234]:
audio_files_data = pd.read_pickle("extracted_features1.pkl")

audio_files_data[["mfccs","chroma","mel","contrast","tonnetz","zero_crossing","centroids","energy"]] = pd.DataFrame(audio_files_data['to_drop'].tolist(), index=audio_files_data.index)
audio_files_data.drop('to_drop',inplace=True, axis=1)

audio_files_data['audio_file'] = pd.DataFrame(audio_files_data['audio_file_full_path'].apply(lambda x: x.split('/')[-1]))

audio_files_data

Unnamed: 0,audio_file_full_path,mfccs,chroma,mel,contrast,tonnetz,zero_crossing,centroids,energy,audio_file
0,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-528.42523, 104.517365, 69.907585, 42.572124,...","[0.80671513, 0.8066955, 0.8320573, 0.8765917, ...","[46.26634, 18.62562, 6.161757, 2.8784091, 1.03...","[21.51623339430073, 11.658483335641492, 13.289...","[0.018575801615783444, 0.013308380870426026, -...",936,862,40.098251,101_1b1_Al_sc_Meditron.wav
1,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-418.4332, 68.25591, 48.025726, 33.063877, 27...","[0.8252702, 0.8410056, 0.8774302, 0.9184118, 0...","[909.83435, 11.385075, 0.80319166, 0.29468063,...","[30.037173123830808, 11.463252328607716, 12.74...","[-0.013298861290560623, 0.028676890897064482, ...",490,862,186.288315,103_2b2_Ar_mc_LittC2SE.wav
2,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-441.49817, 100.99428, 74.06434, 43.248154, 1...","[0.7238368, 0.7624288, 0.7480874, 0.74456394, ...","[93.04067, 24.775574, 4.0288863, 1.1533853, 1....","[21.71142489421972, 14.342142632119268, 17.742...","[-0.035498577092532055, -0.004184018438682413,...",2365,683,51.258595,104_1b1_Al_sc_Litt3200.wav
3,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-443.03708, 106.82732, 74.783844, 40.041946, ...","[0.77929807, 0.7877461, 0.7243347, 0.67312485,...","[38.009144, 7.5106783, 2.8273742, 3.3235767, 2...","[19.34737633609096, 13.054168489076734, 17.611...","[-0.021399820960275442, 0.014060908436482307, ...",4268,1102,47.924110,104_1b1_Ar_sc_Litt3200.wav
4,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-477.4034, 67.0189, 54.495525, 39.22147, 26.0...","[0.70687455, 0.7492464, 0.7788246, 0.7583348, ...","[14.011383, 7.834487, 2.4584835, 0.47662553, 0...","[21.90517966903457, 14.081296377636143, 17.063...","[-0.023834250896429546, 0.02598451129195089, -...",1557,796,24.590376,104_1b1_Ll_sc_Litt3200.wav
...,...,...,...,...,...,...,...,...,...,...
915,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-365.37302, 112.27038, 50.73541, 34.21272, 16...","[0.8336668, 0.8414425, 0.85295093, 0.8513387, ...","[700.5175, 73.75414, 11.35324, 3.4255917, 1.60...","[24.137916093126375, 12.830525033323303, 14.82...","[-0.013647603283556653, 0.03338061710846057, -...",1310,1460,236.242462,223_1b1_Ar_sc_Meditron.wav
916,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-397.4662, 60.472786, 45.182053, 34.165176, 2...","[0.8692288, 0.89003456, 0.90306336, 0.895844, ...","[656.2761, 27.06144, 5.59436, 1.7015041, 0.655...","[29.911176844032237, 12.269187724217437, 12.60...","[-0.007674663183378932, 0.0320164014904898, -0...",546,1430,263.315369,223_1b1_Ll_sc_Meditron.wav
917,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-421.35468, 82.7416, 48.329437, 28.606459, 27...","[0.7979248, 0.821374, 0.8588465, 0.8727476, 0....","[450.815, 20.26086, 4.061995, 2.2929623, 2.170...","[27.33587570002304, 12.079580279863467, 13.138...","[-0.027389382860270345, 0.01461230358808515, -...",764,1417,211.846619,223_1b1_Pl_sc_Meditron.wav
918,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-526.95135, 74.23647, 54.046577, 37.761154, 2...","[0.87883294, 0.8795557, 0.89829457, 0.9165534,...","[67.77894, 19.184908, 6.492502, 0.66145724, 0....","[26.825839926930502, 11.89704694131603, 12.936...","[-0.013993864033623907, 0.03054745633546835, -...",788,862,53.422619,224_1b1_Tc_sc_Meditron.wav


In [266]:
df['audio_file'] = df['audio_file'].astype(str)
audio_files_data['audio_file_full_path'] = audio_files_data['audio_file_full_path'].astype(str)

#df = df.reset_index(drop=True)
#audio_files_data = audio_files_data.reset_index(drop=True)

#df = pd.concat([df, audio_files_data], axis=1)
df =  df.join(audio_files_data.set_index('audio_file'), on = 'audio_file', how = 'left')

#df.drop('audio_file',inplace=True, axis=1)

"""Thus at this point we see we have many NaN values in our dataset. However we notice that BMI, weight, and height are all related through a mathamatical equation. So we have hope of finding ways to fill in these NaN values."""

'Thus at this point we see we have many NaN values in our dataset. However we notice that BMI, weight, and height are all related through a mathamatical equation. So we have hope of finding ways to fill in these NaN values.'

In [267]:
to_drop = []
for file_name in audio_files_data['audio_file_full_path']:
    with wave.open(file_name, "rb") as wave_file:
        frame_rate = wave_file.getframerate()
        if(frame_rate != 44100):
            to_drop.append(file_name)

len(to_drop)
#to_drop = ['/content/drive/My Drive/ICBHI_data/ICBHI_final_database/101_1b1_Al_sc_Meditron.wav', '/content/drive/My Drive/ICBHI_data/ICBHI_final_database/103_2b2_Ar_mc_LittC2SE.wav']

96

In [268]:
for item in to_drop:
  df = df[df.audio_file_full_path != item]


In [269]:
df = df.drop(columns=['Adult BMI (kg/m2)',"Child Weight (kg)","Child Height (cm)"])


In [270]:
df = df[df['Age'].notna()]
df = df.reset_index(drop=True)


In [278]:
"""# Need to concatenate all of our spectral feature arrays and put them all in one array"""
good_rows = []

for i in range(len(df)):
   good_rows.append(np.concatenate((df['mfccs'][i] , df['chroma'][i] , df['mel'][i] , df['contrast'][i] , df['tonnetz'][i])))

df_copy =  pd.DataFrame({'info': good_rows})

df_copy = pd.DataFrame(df_copy['info'].tolist(), index=df_copy.index)

df_copy

df = pd.concat([df, df_copy], axis=1)

In [280]:
df = pd.get_dummies(data=df, columns=['Chest location','Acquisition mode','Recording equipment'])

In [281]:
df

Unnamed: 0,Patient number,Age,Sex,Diagnosis,Binary_diagnosis,Recording index,audio_file,audio_file_full_path,mfccs,chroma,...,Chest location_Ll,Chest location_Lr,Chest location_Pl,Chest location_Pr,Chest location_Tc,Acquisition mode_mc,Acquisition mode_sc,Recording equipment_AKGC417L,Recording equipment_LittC2SE,Recording equipment_Meditron
0,101,3.00,F,URTI,Unhealthy,1b1,101_1b1_Al_sc_Meditron.wav,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-528.42523, 104.517365, 69.907585, 42.572124,...","[0.80671513, 0.8066955, 0.8320573, 0.8765917, ...",...,False,False,False,False,False,False,True,False,False,True
1,101,3.00,F,URTI,Unhealthy,1b1,101_1b1_Pr_sc_Meditron.wav,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-582.5047, 95.39471, 57.40218, 31.435293, 28....","[0.8172376, 0.83850676, 0.86833954, 0.89889115...",...,False,False,False,True,False,False,True,False,False,True
2,102,0.75,F,Healthy,Healthy,1b1,102_1b1_Ar_sc_Meditron.wav,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-596.8061, 116.009735, 60.59253, 25.570185, 2...","[0.81662965, 0.85524493, 0.9108528, 0.9124299,...",...,False,False,False,False,False,False,True,False,False,True
3,103,70.00,F,Asthma,Unhealthy,2b2,103_2b2_Ar_mc_LittC2SE.wav,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-418.4332, 68.25591, 48.025726, 33.063877, 27...","[0.8252702, 0.8410056, 0.8774302, 0.9184118, 0...",...,False,False,False,False,False,True,False,False,True,False
4,105,7.00,F,URTI,Unhealthy,1b1,105_1b1_Tc_sc_Meditron.wav,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-354.35706, 177.72319, 23.68711, -61.08058, 5...","[0.84354645, 0.9046195, 0.86379474, 0.8055111,...",...,False,False,False,False,True,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
819,224,10.00,F,Healthy,Healthy,1b2,224_1b2_Al_sc_Meditron.wav,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-547.4475, 75.98515, 54.631798, 41.02187, 34....","[0.84370035, 0.8341989, 0.85375684, 0.8836045,...",...,False,False,False,False,False,False,True,False,False,True
820,225,0.83,M,Healthy,Healthy,1b1,225_1b1_Pl_sc_Meditron.wav,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-533.13995, 99.10363, 55.93617, 30.745462, 24...","[0.8169379, 0.8169047, 0.8328133, 0.8611766, 0...",...,False,False,True,False,False,False,True,False,False,True
821,226,4.00,M,Pneumonia,Unhealthy,1b1,226_1b1_Al_sc_Meditron.wav,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-520.4204, 102.1701, 62.05142, 36.29775, 32.7...","[0.8446185, 0.83890134, 0.83978844, 0.83535963...",...,False,False,False,False,False,False,True,False,False,True
822,226,4.00,M,Pneumonia,Unhealthy,1b1,226_1b1_Ll_sc_Meditron.wav,/content/drive/My Drive/ICBHI_data/ICBHI_final...,"[-484.38248, 85.32702, 64.266525, 45.73867, 36...","[0.850083, 0.85153455, 0.86403704, 0.878169, 0...",...,True,False,False,False,False,False,True,False,False,True


In [283]:
df['Sex'] = (df['Sex'] == "M").astype(int)
df.drop(columns=['audio_file','audio_file_full_path'],inplace=True)
df

Unnamed: 0,Patient number,Age,Sex,Diagnosis,Binary_diagnosis,Recording index,mfccs,chroma,mel,contrast,...,Chest location_Ll,Chest location_Lr,Chest location_Pl,Chest location_Pr,Chest location_Tc,Acquisition mode_mc,Acquisition mode_sc,Recording equipment_AKGC417L,Recording equipment_LittC2SE,Recording equipment_Meditron
0,101,3.00,0,URTI,Unhealthy,1b1,"[-528.42523, 104.517365, 69.907585, 42.572124,...","[0.80671513, 0.8066955, 0.8320573, 0.8765917, ...","[46.26634, 18.62562, 6.161757, 2.8784091, 1.03...","[21.51623339430073, 11.658483335641492, 13.289...",...,False,False,False,False,False,False,True,False,False,True
1,101,3.00,0,URTI,Unhealthy,1b1,"[-582.5047, 95.39471, 57.40218, 31.435293, 28....","[0.8172376, 0.83850676, 0.86833954, 0.89889115...","[17.473001, 2.0139313, 0.198458, 0.067585096, ...","[24.07452348550257, 12.552549215921363, 13.484...",...,False,False,False,True,False,False,True,False,False,True
2,102,0.75,0,Healthy,Healthy,1b1,"[-596.8061, 116.009735, 60.59253, 25.570185, 2...","[0.81662965, 0.85524493, 0.9108528, 0.9124299,...","[6.2047653, 0.60025597, 0.102930844, 0.0519275...","[21.9899662556195, 11.45872972224247, 14.76904...",...,False,False,False,False,False,False,True,False,False,True
3,103,70.00,0,Asthma,Unhealthy,2b2,"[-418.4332, 68.25591, 48.025726, 33.063877, 27...","[0.8252702, 0.8410056, 0.8774302, 0.9184118, 0...","[909.83435, 11.385075, 0.80319166, 0.29468063,...","[30.037173123830808, 11.463252328607716, 12.74...",...,False,False,False,False,False,True,False,False,True,False
4,105,7.00,0,URTI,Unhealthy,1b1,"[-354.35706, 177.72319, 23.68711, -61.08058, 5...","[0.84354645, 0.9046195, 0.86379474, 0.8055111,...","[6.8732758, 2.2774293, 0.5358679, 0.16095991, ...","[17.623712631952646, 10.033226405791604, 12.29...",...,False,False,False,False,True,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
819,224,10.00,0,Healthy,Healthy,1b2,"[-547.4475, 75.98515, 54.631798, 41.02187, 34....","[0.84370035, 0.8341989, 0.85375684, 0.8836045,...","[39.994473, 13.610375, 1.1346987, 0.13204911, ...","[25.695660358850155, 12.580743137171961, 13.11...",...,False,False,False,False,False,False,True,False,False,True
820,225,0.83,0,Healthy,Healthy,1b1,"[-533.13995, 99.10363, 55.93617, 30.745462, 24...","[0.8169379, 0.8169047, 0.8328133, 0.8611766, 0...","[35.39508, 4.80749, 1.4905819, 0.43252712, 0.0...","[24.252329838423517, 12.416137892155126, 13.06...",...,False,False,True,False,False,False,True,False,False,True
821,226,4.00,0,Pneumonia,Unhealthy,1b1,"[-520.4204, 102.1701, 62.05142, 36.29775, 32.7...","[0.8446185, 0.83890134, 0.83978844, 0.83535963...","[34.4036, 5.8527074, 1.0365623, 0.29175016, 0....","[21.465848401519754, 12.479125572333091, 14.70...",...,False,False,False,False,False,False,True,False,False,True
822,226,4.00,0,Pneumonia,Unhealthy,1b1,"[-484.38248, 85.32702, 64.266525, 45.73867, 36...","[0.850083, 0.85153455, 0.86403704, 0.878169, 0...","[145.206, 21.312765, 4.0849094, 0.6241147, 0.2...","[24.02525067770458, 13.379899970535266, 14.120...",...,True,False,False,False,False,False,True,False,False,True


In [284]:
print("Number of Healthy Patients: ",(df['Binary_diagnosis'] == "Healthy").sum())
print("Number of Unhealthy Patients: ",(df['Binary_diagnosis'] == "Unhealthy").sum())

print("Number of Patients with Asthma are: ",(df['Diagnosis'] == "Asthma").sum())
print("Number of Patients with Bronchiectasis are: ",(df['Diagnosis'] == "Bronchiectasis").sum())
print("Number of Patients with Bronchiolitis are: ",(df['Diagnosis'] == "Bronchiolitis").sum())
print("Number of Patients with COPD are: ",(df['Diagnosis'] == "COPD").sum())
print("Number of Patients that are Healthy, are: ",(df['Diagnosis'] == "Healthy").sum())
print("Number of Patients with LRTI are: ",(df['Diagnosis'] == "LRTI").sum())
print("Number of Patients with Pneumonia are: ",(df['Diagnosis'] == "Pneumonia").sum())
print("Number of Patients with URTI are: ",(df['Diagnosis'] == "URTI").sum())


Number of Healthy Patients:  35
Number of Unhealthy Patients:  789
Number of Patients with Asthma are:  1
Number of Patients with Bronchiectasis are:  16
Number of Patients with Bronchiolitis are:  13
Number of Patients with COPD are:  697
Number of Patients that are Healthy, are:  35
Number of Patients with LRTI are:  2
Number of Patients with Pneumonia are:  37
Number of Patients with URTI are:  23


In [285]:
df = df[ df["Diagnosis"] != "Asthma"]
df = df[ df["Diagnosis"] != "LRTI"  ]
df.reset_index(inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 821 entries, 0 to 820
Columns: 220 entries, index to Recording equipment_Meditron
dtypes: bool(12), float64(195), int64(5), object(8)
memory usage: 1.3+ MB


In [286]:
df.to_pickle("/content/drive/My Drive/ICBHI_data/Final_Data.pkl")