## 1)

In [1]:
import pandas as pd
patients = pd.read_csv("patients.csv")
patients

Unnamed: 0,Name,Age,Gender,BloodGroup,Weight(kg),Height(m),SystolicBloodPressure(mmHg),DiastolicBloodPressure(mmHg),Temperature(F),Diabetes
0,P.Lee,35,Female,ARh+,50,1.52,68,112,98.7,0
1,R.Jones,52,Male,ORh-,115,1.77,110,154,98.5,1
2,J.Smith,45,Male,ORh+,96,1.83,88,136,98.8,0
3,A.Patel,70,,ORh-,41,1.55,76,125,98.6,0
4,M.Owen,24,Male,ARh-,79,1.82,65,105,98.7,0
5,S.Green,43,Male,ORh-,109,1.89,114,159,98.9,1
6,N.Cook,68,Male,ARh+,73,1.76,108,136,99.0,0
7,W.Hands,77,Female,ORh-,104,1.71,107,145,98.3,1
8,P.Rice,45,Female,ORh+,64,,101,132,98.6,0
9,F.Marsh,28,Male,ORh+,136,1.78,121,165,98.7,1



## 2)

### Set the missing gender

The person in row 3 is short and light and we can assume that it is a woman (although we cannot be sure!).

In [2]:
# Use the at() function to get an lvalue (assignable location) in the data frame
patients.at[3, "Gender"] = "Female"
patients

Unnamed: 0,Name,Age,Gender,BloodGroup,Weight(kg),Height(m),SystolicBloodPressure(mmHg),DiastolicBloodPressure(mmHg),Temperature(F),Diabetes
0,P.Lee,35,Female,ARh+,50,1.52,68,112,98.7,0
1,R.Jones,52,Male,ORh-,115,1.77,110,154,98.5,1
2,J.Smith,45,Male,ORh+,96,1.83,88,136,98.8,0
3,A.Patel,70,Female,ORh-,41,1.55,76,125,98.6,0
4,M.Owen,24,Male,ARh-,79,1.82,65,105,98.7,0
5,S.Green,43,Male,ORh-,109,1.89,114,159,98.9,1
6,N.Cook,68,Male,ARh+,73,1.76,108,136,99.0,0
7,W.Hands,77,Female,ORh-,104,1.71,107,145,98.3,1
8,P.Rice,45,Female,ORh+,64,,101,132,98.6,0
9,F.Marsh,28,Male,ORh+,136,1.78,121,165,98.7,1


### Set the missing height
A woman's height is missing in row 8. We'll set the mean of all women's heights in the sample as the value. This is a very rough way of imputing the value, but it is better than using the mean of all heights. 

In [3]:
import numpy as np
# Get the heights of female patients
fheights = patients["Height(m)"][patients["Gender"]=="Female"]
fheights

0    1.52
3    1.55
7    1.71
8     NaN
Name: Height(m), dtype: float64

In [4]:
# Get the mean of female patient heights, excluding nans
fheightsmean = np.nanmean(fheights)
fheightsmean

1.5933333333333335

In [5]:
# Round to 2 decimal places
fheightsmean = round(fheightsmean, 2)
fheightsmean

1.59

In [6]:
# Assign the height in the data frame instance that is missing it
patients.at[8, "Height(m)"] = fheightsmean 
patients

Unnamed: 0,Name,Age,Gender,BloodGroup,Weight(kg),Height(m),SystolicBloodPressure(mmHg),DiastolicBloodPressure(mmHg),Temperature(F),Diabetes
0,P.Lee,35,Female,ARh+,50,1.52,68,112,98.7,0
1,R.Jones,52,Male,ORh-,115,1.77,110,154,98.5,1
2,J.Smith,45,Male,ORh+,96,1.83,88,136,98.8,0
3,A.Patel,70,Female,ORh-,41,1.55,76,125,98.6,0
4,M.Owen,24,Male,ARh-,79,1.82,65,105,98.7,0
5,S.Green,43,Male,ORh-,109,1.89,114,159,98.9,1
6,N.Cook,68,Male,ARh+,73,1.76,108,136,99.0,0
7,W.Hands,77,Female,ORh-,104,1.71,107,145,98.3,1
8,P.Rice,45,Female,ORh+,64,1.59,101,132,98.6,0
9,F.Marsh,28,Male,ORh+,136,1.78,121,165,98.7,1


## 3)

In [7]:
# We can 'drop' a column from the data frame using method drop(); axis of 1 is the vertical axis; 
# inplace indicates whether to make the change in the existing dataframe or to create a new data frame
patients.drop('Name', axis=1, inplace=True)
patients

Unnamed: 0,Age,Gender,BloodGroup,Weight(kg),Height(m),SystolicBloodPressure(mmHg),DiastolicBloodPressure(mmHg),Temperature(F),Diabetes
0,35,Female,ARh+,50,1.52,68,112,98.7,0
1,52,Male,ORh-,115,1.77,110,154,98.5,1
2,45,Male,ORh+,96,1.83,88,136,98.8,0
3,70,Female,ORh-,41,1.55,76,125,98.6,0
4,24,Male,ARh-,79,1.82,65,105,98.7,0
5,43,Male,ORh-,109,1.89,114,159,98.9,1
6,68,Male,ARh+,73,1.76,108,136,99.0,0
7,77,Female,ORh-,104,1.71,107,145,98.3,1
8,45,Female,ORh+,64,1.59,101,132,98.6,0
9,28,Male,ORh+,136,1.78,121,165,98.7,1


## 4)

In [8]:
# Use the pandas rename() function to rename the column
patients.rename(columns={'Temperature(F)':'Temperature(C)'}, inplace=True)
# Convert the farenheight values to celsius
celsius_values = (patients['Temperature(C)'] - 32) * 5 / 9
# Assign the new values to the data frame column, rounding them to 1 decimal place
patients['Temperature(C)'] = round(celsius_values, 1)
patients

Unnamed: 0,Age,Gender,BloodGroup,Weight(kg),Height(m),SystolicBloodPressure(mmHg),DiastolicBloodPressure(mmHg),Temperature(C),Diabetes
0,35,Female,ARh+,50,1.52,68,112,37.1,0
1,52,Male,ORh-,115,1.77,110,154,36.9,1
2,45,Male,ORh+,96,1.83,88,136,37.1,0
3,70,Female,ORh-,41,1.55,76,125,37.0,0
4,24,Male,ARh-,79,1.82,65,105,37.1,0
5,43,Male,ORh-,109,1.89,114,159,37.2,1
6,68,Male,ARh+,73,1.76,108,136,37.2,0
7,77,Female,ORh-,104,1.71,107,145,36.8,1
8,45,Female,ORh+,64,1.59,101,132,37.0,0
9,28,Male,ORh+,136,1.78,121,165,37.1,1


## 5)

In [9]:
# Find the minimal and maximal weights
wmin = min(patients['Weight(kg)'])
wmax = max(patients['Weight(kg)'])
# Apply the formula to get the normalised weights
wnorm = (patients['Weight(kg)'] - wmin)/(wmax - wmin)
# Creating a new column has the same syntax as assigning to an existing one
patients['WeightNormed'] = wnorm 
patients

Unnamed: 0,Age,Gender,BloodGroup,Weight(kg),Height(m),SystolicBloodPressure(mmHg),DiastolicBloodPressure(mmHg),Temperature(C),Diabetes,WeightNormed
0,35,Female,ARh+,50,1.52,68,112,37.1,0,0.094737
1,52,Male,ORh-,115,1.77,110,154,36.9,1,0.778947
2,45,Male,ORh+,96,1.83,88,136,37.1,0,0.578947
3,70,Female,ORh-,41,1.55,76,125,37.0,0,0.0
4,24,Male,ARh-,79,1.82,65,105,37.1,0,0.4
5,43,Male,ORh-,109,1.89,114,159,37.2,1,0.715789
6,68,Male,ARh+,73,1.76,108,136,37.2,0,0.336842
7,77,Female,ORh-,104,1.71,107,145,36.8,1,0.663158
8,45,Female,ORh+,64,1.59,101,132,37.0,0,0.242105
9,28,Male,ORh+,136,1.78,121,165,37.1,1,1.0


## 6)

In [10]:
# For this purpose we use the pandas function cut()
patients['WeightBinned'] = pd.cut(patients["Weight(kg)"], bins=[0, 60, 100, wmax], labels=['low', 'medium', 'high'])
patients

Unnamed: 0,Age,Gender,BloodGroup,Weight(kg),Height(m),SystolicBloodPressure(mmHg),DiastolicBloodPressure(mmHg),Temperature(C),Diabetes,WeightNormed,WeightBinned
0,35,Female,ARh+,50,1.52,68,112,37.1,0,0.094737,low
1,52,Male,ORh-,115,1.77,110,154,36.9,1,0.778947,high
2,45,Male,ORh+,96,1.83,88,136,37.1,0,0.578947,medium
3,70,Female,ORh-,41,1.55,76,125,37.0,0,0.0,low
4,24,Male,ARh-,79,1.82,65,105,37.1,0,0.4,medium
5,43,Male,ORh-,109,1.89,114,159,37.2,1,0.715789,high
6,68,Male,ARh+,73,1.76,108,136,37.2,0,0.336842,medium
7,77,Female,ORh-,104,1.71,107,145,36.8,1,0.663158,high
8,45,Female,ORh+,64,1.59,101,132,37.0,0,0.242105,medium
9,28,Male,ORh+,136,1.78,121,165,37.1,1,1.0,high


## 7)

In [11]:
# This uses the same operations as answers above
patients['BMI'] = patients['Weight(kg)']/(patients['Height(m)']**2)
patients

Unnamed: 0,Age,Gender,BloodGroup,Weight(kg),Height(m),SystolicBloodPressure(mmHg),DiastolicBloodPressure(mmHg),Temperature(C),Diabetes,WeightNormed,WeightBinned,BMI
0,35,Female,ARh+,50,1.52,68,112,37.1,0,0.094737,low,21.641274
1,52,Male,ORh-,115,1.77,110,154,36.9,1,0.778947,high,36.707204
2,45,Male,ORh+,96,1.83,88,136,37.1,0,0.578947,medium,28.666129
3,70,Female,ORh-,41,1.55,76,125,37.0,0,0.0,low,17.065557
4,24,Male,ARh-,79,1.82,65,105,37.1,0,0.4,medium,23.849777
5,43,Male,ORh-,109,1.89,114,159,37.2,1,0.715789,high,30.514263
6,68,Male,ARh+,73,1.76,108,136,37.2,0,0.336842,medium,23.566632
7,77,Female,ORh-,104,1.71,107,145,36.8,1,0.663158,high,35.566499
8,45,Female,ORh+,64,1.59,101,132,37.0,0,0.242105,medium,25.315454
9,28,Male,ORh+,136,1.78,121,165,37.1,1,1.0,high,42.923873


## 8)

In [12]:
print(patients)

   Age  Gender BloodGroup  Weight(kg)  Height(m)  SystolicBloodPressure(mmHg)  \
0   35  Female       ARh+          50       1.52                           68   
1   52    Male       ORh-         115       1.77                          110   
2   45    Male       ORh+          96       1.83                           88   
3   70  Female       ORh-          41       1.55                           76   
4   24    Male       ARh-          79       1.82                           65   
5   43    Male       ORh-         109       1.89                          114   
6   68    Male       ARh+          73       1.76                          108   
7   77  Female       ORh-         104       1.71                          107   
8   45  Female       ORh+          64       1.59                          101   
9   28    Male       ORh+         136       1.78                          121   

   DiastolicBloodPressure(mmHg)  Temperature(C)  Diabetes  WeightNormed  \
0                           112  