## Prepare data for classification

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data = pd.read_excel("OSA_DB_UPM.xlsx")
OSA_df = pd.DataFrame(data)

In [3]:
# Create a list of our conditions
conditions = [
    (OSA_df['IAH'] <= 10),
    (OSA_df['IAH'] >= 30),
    (OSA_df['IAH'] > 10) & (OSA_df['IAH'] < 30)
    ]

# Create a list of the values we want to assign for each condition
values = ['Healthy', 'Middle', 'Severe']

# Create a new column and use np.select to assign values to it using our lists as arguments
OSA_df['OSA'] = np.select(conditions, values)

# Display updated DataFrame
OSA_df.head()

Unnamed: 0,Patient,Gender,IAH,Weight,Height,Age,Cervical,OSA
0,P0002,hombre,29.6,119,174,56,48.0,Severe
1,P0004,hombre,19.7,78,168,39,42.0,Severe
2,P0005,hombre,9.0,80,173,32,40.0,Healthy
3,P0006,hombre,2.0,109,190,32,42.0,Healthy
4,P0007,hombre,34.0,86,169,39,42.0,Middle


In [4]:
df_OSA_male = OSA_df[OSA_df["Gender"] == "hombre"]

In [5]:
df_OSA_male.shape

(455, 8)

In [6]:
df_OSA_male.head()

Unnamed: 0,Patient,Gender,IAH,Weight,Height,Age,Cervical,OSA
0,P0002,hombre,29.6,119,174,56,48.0,Severe
1,P0004,hombre,19.7,78,168,39,42.0,Severe
2,P0005,hombre,9.0,80,173,32,40.0,Healthy
3,P0006,hombre,2.0,109,190,32,42.0,Healthy
4,P0007,hombre,34.0,86,169,39,42.0,Middle


In [7]:
# Filter only healthy and severe OSA
df_OSA_male = df_OSA_male.loc[df_OSA_male['OSA'] != 'Middle']

In [8]:
df_OSA_male.head()

Unnamed: 0,Patient,Gender,IAH,Weight,Height,Age,Cervical,OSA
0,P0002,hombre,29.6,119,174,56,48.0,Severe
1,P0004,hombre,19.7,78,168,39,42.0,Severe
2,P0005,hombre,9.0,80,173,32,40.0,Healthy
3,P0006,hombre,2.0,109,190,32,42.0,Healthy
6,P0009,hombre,22.0,72,165,40,42.0,Severe


In [9]:
df_OSA_male.dtypes

Patient      object
Gender       object
IAH         float64
Weight        int64
Height        int64
Age           int64
Cervical    float64
OSA          object
dtype: object

In [10]:
# Add BMI column
df_OSA_male['BMI'] = df_OSA_male['Weight']/(df_OSA_male['Height']/100)**2
df_OSA_male.head()

Unnamed: 0,Patient,Gender,IAH,Weight,Height,Age,Cervical,OSA,BMI
0,P0002,hombre,29.6,119,174,56,48.0,Severe,39.30506
1,P0004,hombre,19.7,78,168,39,42.0,Severe,27.636054
2,P0005,hombre,9.0,80,173,32,40.0,Healthy,26.729927
3,P0006,hombre,2.0,109,190,32,42.0,Healthy,30.193906
6,P0009,hombre,22.0,72,165,40,42.0,Severe,26.446281


In [11]:
df_OSA_male.describe()

Unnamed: 0,IAH,Weight,Height,Age,Cervical,BMI
count,318.0,318.0,318.0,318.0,318.0,318.0
mean,12.642767,88.572327,175.377358,47.836478,41.575472,28.794656
std,8.344911,14.479817,7.359347,12.715213,2.902022,4.330792
min,0.0,61.0,160.0,20.0,35.0,20.061728
25%,5.425,79.0,170.0,38.0,40.0,25.780897
50%,11.25,86.0,175.0,47.0,41.0,27.991677
75%,19.225,96.0,180.0,58.0,43.0,31.215405
max,29.9,140.0,196.0,85.0,52.0,43.428571


In [12]:
df_OSA_male.head()

Unnamed: 0,Patient,Gender,IAH,Weight,Height,Age,Cervical,OSA,BMI
0,P0002,hombre,29.6,119,174,56,48.0,Severe,39.30506
1,P0004,hombre,19.7,78,168,39,42.0,Severe,27.636054
2,P0005,hombre,9.0,80,173,32,40.0,Healthy,26.729927
3,P0006,hombre,2.0,109,190,32,42.0,Healthy,30.193906
6,P0009,hombre,22.0,72,165,40,42.0,Severe,26.446281


In [13]:
# Save the data frame male into an Excel: df_OSA_male.xlsx
df_OSA_male.to_excel('OSA_extreme_male.xlsx', index = False)

In [14]:
# Save the data frame male into as CSV: df_OSA_male.csv
df_OSA_male.to_csv('OSA_extreme_male.csv', index = False)