# Requirements

In [1]:
import pandas as pd

# Data

Read the patient experiment data.

In [2]:
data = pd.read_excel('data/patient_experiment.xlsx')

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   patient      62 non-null     int64         
 1   dose         61 non-null     float64       
 2   date         62 non-null     datetime64[ns]
 3   temperature  61 non-null     float64       
dtypes: datetime64[ns](1), float64(2), int64(1)
memory usage: 2.1 KB


The first step is transforming the data into a time series.

In [4]:
def create_time_series(df):
    return df.pivot_table(index='date', columns=['patient'])

Next, we should deal with missing data by interpolation.

In [5]:
def impute(df):
    return df.interpolate()

Finally, we compute the mean value of the temperatures across all patients for each time step.  Note that the name of the column is a parameter.

In [6]:
def compute_mean(df, column):
    df['avg_temp'] = df[column].mean(axis=1)
    return df

All these operations can be chained using pipes.

In [7]:
time_series = data.pipe(create_time_series) \
                  .pipe(impute) \
                  .pipe(compute_mean, 'temperature')

In [8]:
time_series.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7 entries, 2012-10-02 10:00:00 to 2012-10-02 16:00:00
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   (dose, 1)         7 non-null      float64
 1   (dose, 2)         7 non-null      float64
 2   (dose, 3)         7 non-null      float64
 3   (dose, 4)         7 non-null      float64
 4   (dose, 5)         7 non-null      float64
 5   (dose, 6)         7 non-null      float64
 6   (dose, 7)         7 non-null      float64
 7   (dose, 8)         7 non-null      float64
 8   (dose, 9)         7 non-null      float64
 9   (temperature, 1)  7 non-null      float64
 10  (temperature, 2)  7 non-null      float64
 11  (temperature, 3)  7 non-null      float64
 12  (temperature, 4)  7 non-null      float64
 13  (temperature, 5)  7 non-null      float64
 14  (temperature, 6)  7 non-null      float64
 15  (temperature, 7)  7 non-null      float64
 16  (temperat

In [9]:
time_series

Unnamed: 0_level_0,dose,dose,dose,dose,dose,dose,dose,dose,dose,temperature,temperature,temperature,temperature,temperature,temperature,temperature,temperature,temperature,avg_temp
patient,1,2,3,4,5,6,7,8,9,1,2,3,4,5,6,7,8,9,Unnamed: 19_level_1
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
2012-10-02 10:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.3,39.3,37.9,38.1,37.9,37.5,39.5,37.8,38.3,38.288889
2012-10-02 11:00:00,2.0,5.0,2.0,5.0,3.0,2.0,10.0,0.0,10.0,38.5,39.4,39.5,37.2,39.5,38.1,40.7,37.9,39.5,38.922222
2012-10-02 12:00:00,2.0,5.0,5.0,5.0,7.0,3.0,5.0,0.0,12.0,38.1,38.1,38.3,36.1,38.3,37.9,39.8,37.4,40.2,38.244444
2012-10-02 13:00:00,2.0,5.0,2.0,0.0,5.0,2.0,8.0,0.0,4.0,37.3,37.3,38.0,35.9,38.5,37.7,40.2,37.6,39.1,37.955556
2012-10-02 14:00:00,0.0,0.0,2.0,0.0,9.0,1.0,3.0,0.0,4.0,37.5,36.8,37.7,36.3,39.4,37.2,38.3,37.3,37.9,37.6
2012-10-02 15:00:00,0.0,0.0,2.0,0.0,3.0,0.0,3.0,0.0,0.0,37.1,36.8,37.1,36.6,37.9,36.8,37.6,37.1,37.1,37.122222
2012-10-02 16:00:00,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,36.8,36.8,36.7,36.7,37.2,36.8,37.3,36.8,37.3,36.933333


The original dataframe is unchanged.

In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   patient      62 non-null     int64         
 1   dose         61 non-null     float64       
 2   date         62 non-null     datetime64[ns]
 3   temperature  61 non-null     float64       
dtypes: datetime64[ns](1), float64(2), int64(1)
memory usage: 2.1 KB


In [11]:
data

Unnamed: 0,patient,dose,date,temperature
0,1,0.0,2012-10-02 10:00:00,38.3
1,1,2.0,2012-10-02 11:00:00,38.5
2,1,2.0,2012-10-02 12:00:00,38.1
3,1,2.0,2012-10-02 13:00:00,37.3
4,1,0.0,2012-10-02 14:00:00,37.5
...,...,...,...,...
57,9,12.0,2012-10-02 12:00:00,40.2
58,9,4.0,2012-10-02 13:00:00,39.1
59,9,4.0,2012-10-02 14:00:00,37.9
60,9,0.0,2012-10-02 15:00:00,37.1
