In [1]:
import numpy as np
import pandas as pd

In [3]:
data = pd.read_csv('Pfizer_1.csv')

In [5]:
data.head()

Unnamed: 0,Date,Drug_Name,Parameter,1:30:00,2:30:00,3:30:00,4:30:00,5:30:00,6:30:00,7:30:00,8:30:00,9:30:00,10:30:00,11:30:00,12:30:00
0,15-10-2020,diltiazem hydrochloride,Temperature,23.0,22.0,,21.0,21.0,22,23.0,21.0,22.0,20,20.0,21
1,15-10-2020,diltiazem hydrochloride,Pressure,12.0,13.0,,11.0,13.0,14,16.0,16.0,24.0,18,19.0,20
2,15-10-2020,docetaxel injection,Temperature,,17.0,18.0,,17.0,18,,,23.0,23,25.0,25
3,15-10-2020,docetaxel injection,Pressure,,22.0,22.0,,22.0,23,,,27.0,26,29.0,28
4,15-10-2020,ketamine hydrochloride,Temperature,24.0,,,27.0,,26,25.0,24.0,23.0,22,21.0,20


In [7]:
data_melt = pd.melt(
  data,
  id_vars=['Date', 'Drug_Name','Parameter'],
  var_name='Time',
  value_name='Reading'
)
data_melt.head()

Unnamed: 0,Date,Drug_Name,Parameter,Time,Reading
0,15-10-2020,diltiazem hydrochloride,Temperature,1:30:00,23.0
1,15-10-2020,diltiazem hydrochloride,Pressure,1:30:00,12.0
2,15-10-2020,docetaxel injection,Temperature,1:30:00,
3,15-10-2020,docetaxel injection,Pressure,1:30:00,
4,15-10-2020,ketamine hydrochloride,Temperature,1:30:00,24.0


In [83]:
data_tidy = data_melt.pivot(
  index = ["Date", "Time", "Drug_Name"],
  columns= "Parameter",
  values = "Reading"
).reset_index()
data_tidy.head()

Parameter,Date,Time,Drug_Name,Pressure,Temperature
0,15-10-2020,10:30:00,diltiazem hydrochloride,18.0,20.0
1,15-10-2020,10:30:00,docetaxel injection,26.0,23.0
2,15-10-2020,10:30:00,ketamine hydrochloride,9.0,22.0
3,15-10-2020,11:30:00,diltiazem hydrochloride,19.0,20.0
4,15-10-2020,11:30:00,docetaxel injection,29.0,25.0


In [111]:
data_tidy.columns.name = None

In [11]:
data_tidy.head()

Unnamed: 0,Date,Time,Drug_Name,Pressure,Temperature
0,15-10-2020,10:30:00,diltiazem hydrochloride,18.0,20.0
1,15-10-2020,10:30:00,docetaxel injection,26.0,23.0
2,15-10-2020,10:30:00,ketamine hydrochloride,9.0,22.0
3,15-10-2020,11:30:00,diltiazem hydrochloride,19.0,20.0
4,15-10-2020,11:30:00,docetaxel injection,29.0,25.0


# Working with Missing values

In [12]:
# Data Conatins many Nan values
data

Unnamed: 0,Date,Drug_Name,Parameter,1:30:00,2:30:00,3:30:00,4:30:00,5:30:00,6:30:00,7:30:00,8:30:00,9:30:00,10:30:00,11:30:00,12:30:00
0,15-10-2020,diltiazem hydrochloride,Temperature,23.0,22.0,,21.0,21.0,22,23.0,21.0,22.0,20,20.0,21
1,15-10-2020,diltiazem hydrochloride,Pressure,12.0,13.0,,11.0,13.0,14,16.0,16.0,24.0,18,19.0,20
2,15-10-2020,docetaxel injection,Temperature,,17.0,18.0,,17.0,18,,,23.0,23,25.0,25
3,15-10-2020,docetaxel injection,Pressure,,22.0,22.0,,22.0,23,,,27.0,26,29.0,28
4,15-10-2020,ketamine hydrochloride,Temperature,24.0,,,27.0,,26,25.0,24.0,23.0,22,21.0,20
5,15-10-2020,ketamine hydrochloride,Pressure,8.0,,,7.0,,9,10.0,11.0,10.0,9,9.0,11
6,16-10-2020,diltiazem hydrochloride,Temperature,34.0,35.0,36.0,36.0,37.0,38,37.0,38.0,39.0,40,,42
7,16-10-2020,diltiazem hydrochloride,Pressure,18.0,19.0,20.0,21.0,22.0,23,24.0,25.0,25.0,24,,27
8,16-10-2020,docetaxel injection,Temperature,46.0,47.0,,48.0,48.0,49,50.0,52.0,55.0,56,57.0,58
9,16-10-2020,docetaxel injection,Pressure,23.0,24.0,,25.0,26.0,27,28.0,29.0,28.0,28,29.0,30


In [13]:
'''
NaN and None Both represents Missing Data
NAN -> Not a Number
None -> Simplly means None
'''

'\nNaN and None Both represents Missing Data\nNAN -> Not a Number\nNone -> Simplly means None\n'

In [14]:
type(None)

NoneType

In [15]:
type(np.nan)

float

In [16]:
pd.Series([1,np.nan,2,None])

0    1.0
1    NaN
2    2.0
3    NaN
dtype: float64

In [18]:
pd.Series(["1","0","2",None])

0       1
1       0
2       2
3    None
dtype: object

In [19]:
pd.Series(["1",np.nan,"2","3"])

0      1
1    NaN
2      2
3      3
dtype: object

In [20]:
pd.Series(["1",np.nan,"2",None])

0       1
1     NaN
2       2
3    None
dtype: object

### Treament For Missing values
- Removing the rows/columns that contain even a single missing values
- Filling missing Vlues with some estimated values

In [22]:
# Prints a mask where every missing value is marked as True
data.isna()

Unnamed: 0,Date,Drug_Name,Parameter,1:30:00,2:30:00,3:30:00,4:30:00,5:30:00,6:30:00,7:30:00,8:30:00,9:30:00,10:30:00,11:30:00,12:30:00
0,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
2,False,False,False,True,False,False,True,False,False,True,True,False,False,False,False
3,False,False,False,True,False,False,True,False,False,True,True,False,False,False,False
4,False,False,False,False,True,True,False,True,False,False,False,False,False,False,False
5,False,False,False,False,True,True,False,True,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
7,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
8,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False


In [23]:
data.isnull()

Unnamed: 0,Date,Drug_Name,Parameter,1:30:00,2:30:00,3:30:00,4:30:00,5:30:00,6:30:00,7:30:00,8:30:00,9:30:00,10:30:00,11:30:00,12:30:00
0,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
2,False,False,False,True,False,False,True,False,False,True,True,False,False,False,False
3,False,False,False,True,False,False,True,False,False,True,True,False,False,False,False
4,False,False,False,False,True,True,False,True,False,False,False,False,False,False,False
5,False,False,False,False,True,True,False,True,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
7,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
8,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False


In [25]:
pd.isna

<function pandas.core.dtypes.missing.isna(obj: 'object') -> 'bool | npt.NDArray[np.bool_] | NDFrame'>

In [26]:
pd.isnull

<function pandas.core.dtypes.missing.isna(obj: 'object') -> 'bool | npt.NDArray[np.bool_] | NDFrame'>

In [27]:
# Missing Values in every Column
data.isna().sum()

Date         0
Drug_Name    0
Parameter    0
1:30:00      2
2:30:00      2
3:30:00      6
4:30:00      4
5:30:00      2
6:30:00      0
7:30:00      2
8:30:00      4
9:30:00      2
10:30:00     0
11:30:00     2
12:30:00     0
dtype: int64

In [29]:
# Missing Values in every Row
data.isna().sum(axis=1)

0     1
1     1
2     4
3     4
4     3
5     3
6     1
7     1
8     1
9     1
10    2
11    2
12    1
13    1
14    0
15    0
16    0
17    0
dtype: int64

In [30]:
# Every Single Row containg Missing values has been removed
data.dropna()

Unnamed: 0,Date,Drug_Name,Parameter,1:30:00,2:30:00,3:30:00,4:30:00,5:30:00,6:30:00,7:30:00,8:30:00,9:30:00,10:30:00,11:30:00,12:30:00
14,17-10-2020,docetaxel injection,Temperature,12.0,13.0,14.0,15.0,16.0,17,18.0,19.0,20.0,21,22.0,23
15,17-10-2020,docetaxel injection,Pressure,20.0,22.0,22.0,22.0,22.0,23,25.0,26.0,27.0,28,29.0,28
16,17-10-2020,ketamine hydrochloride,Temperature,13.0,14.0,15.0,16.0,17.0,18,19.0,20.0,21.0,22,23.0,24
17,17-10-2020,ketamine hydrochloride,Pressure,8.0,9.0,10.0,11.0,11.0,12,12.0,11.0,12.0,13,14.0,15


In [31]:
# Every Single Column containg Missing values has been removed
data.dropna(axis = 1)

Unnamed: 0,Date,Drug_Name,Parameter,6:30:00,10:30:00,12:30:00
0,15-10-2020,diltiazem hydrochloride,Temperature,22,20,21
1,15-10-2020,diltiazem hydrochloride,Pressure,14,18,20
2,15-10-2020,docetaxel injection,Temperature,18,23,25
3,15-10-2020,docetaxel injection,Pressure,23,26,28
4,15-10-2020,ketamine hydrochloride,Temperature,26,22,20
5,15-10-2020,ketamine hydrochloride,Pressure,9,9,11
6,16-10-2020,diltiazem hydrochloride,Temperature,38,40,42
7,16-10-2020,diltiazem hydrochloride,Pressure,23,24,27
8,16-10-2020,docetaxel injection,Temperature,49,56,58
9,16-10-2020,docetaxel injection,Pressure,27,28,30


### Method-2 Filling

In [32]:
data.fillna(0)

Unnamed: 0,Date,Drug_Name,Parameter,1:30:00,2:30:00,3:30:00,4:30:00,5:30:00,6:30:00,7:30:00,8:30:00,9:30:00,10:30:00,11:30:00,12:30:00
0,15-10-2020,diltiazem hydrochloride,Temperature,23.0,22.0,0.0,21.0,21.0,22,23.0,21.0,22.0,20,20.0,21
1,15-10-2020,diltiazem hydrochloride,Pressure,12.0,13.0,0.0,11.0,13.0,14,16.0,16.0,24.0,18,19.0,20
2,15-10-2020,docetaxel injection,Temperature,0.0,17.0,18.0,0.0,17.0,18,0.0,0.0,23.0,23,25.0,25
3,15-10-2020,docetaxel injection,Pressure,0.0,22.0,22.0,0.0,22.0,23,0.0,0.0,27.0,26,29.0,28
4,15-10-2020,ketamine hydrochloride,Temperature,24.0,0.0,0.0,27.0,0.0,26,25.0,24.0,23.0,22,21.0,20
5,15-10-2020,ketamine hydrochloride,Pressure,8.0,0.0,0.0,7.0,0.0,9,10.0,11.0,10.0,9,9.0,11
6,16-10-2020,diltiazem hydrochloride,Temperature,34.0,35.0,36.0,36.0,37.0,38,37.0,38.0,39.0,40,0.0,42
7,16-10-2020,diltiazem hydrochloride,Pressure,18.0,19.0,20.0,21.0,22.0,23,24.0,25.0,25.0,24,0.0,27
8,16-10-2020,docetaxel injection,Temperature,46.0,47.0,0.0,48.0,48.0,49,50.0,52.0,55.0,56,57.0,58
9,16-10-2020,docetaxel injection,Pressure,23.0,24.0,0.0,25.0,26.0,27,28.0,29.0,28.0,28,29.0,30


**Problem with filling zero**

In [35]:
a = [18,20,15,20,22,19,None,None,21]
a

[18, 20, 15, 20, 22, 19, None, None, 21]

In [37]:
def calc_average(a):
  count = 0
  sum_numbers = 0
  for i in a:
    if type(i)  == int:
      count += 1
      sum_numbers += i
      
  return sum_numbers/count

In [38]:
calc_average(a)

19.285714285714285

In [39]:
def calc_average_2(a):
  count = 0
  sum_numbers = 0
  for i in a:
    count += 1
    if type(i)  == int:
      sum_numbers += i
    
      
  return sum_numbers/count

In [41]:
calc_average_2(a) # Zero affects the average most

15.0

In [51]:
time_data_mean = data["2:30:00"].mean()
time_data_mean

18.8125

In [45]:
data["2:30:00"].fillna(0)

0     22.0
1     13.0
2     17.0
3     22.0
4      0.0
5      0.0
6     35.0
7     19.0
8     47.0
9     24.0
10     9.0
11    12.0
12    19.0
13     4.0
14    13.0
15    22.0
16    14.0
17     9.0
Name: 2:30:00, dtype: float64

In [50]:
data["2:30:00"].fillna(0).mean()


16.72222222222222

In [52]:
# time_data_mean = data["2:30:00"].mean()
data["2:30:00"].fillna(time_data_mean)

0     22.0000
1     13.0000
2     17.0000
3     22.0000
4     18.8125
5     18.8125
6     35.0000
7     19.0000
8     47.0000
9     24.0000
10     9.0000
11    12.0000
12    19.0000
13     4.0000
14    13.0000
15    22.0000
16    14.0000
17     9.0000
Name: 2:30:00, dtype: float64

In [53]:
#! Data_tidy
# fill in the missing values for temp
# with average value of Temp for that given drug

In [71]:
data_tidy

Parameter,Date,Time,Drug_Name,Pressure,Temperature
0,15-10-2020,10:30:00,diltiazem hydrochloride,18.0,20.0
1,15-10-2020,10:30:00,docetaxel injection,26.0,23.0
2,15-10-2020,10:30:00,ketamine hydrochloride,9.0,22.0
3,15-10-2020,11:30:00,diltiazem hydrochloride,19.0,20.0
4,15-10-2020,11:30:00,docetaxel injection,29.0,25.0
...,...,...,...,...,...
103,17-10-2020,8:30:00,docetaxel injection,26.0,19.0
104,17-10-2020,8:30:00,ketamine hydrochloride,11.0,20.0
105,17-10-2020,9:30:00,diltiazem hydrochloride,9.0,13.0
106,17-10-2020,9:30:00,docetaxel injection,27.0,20.0


In [74]:
data_tidy.groupby("Drug_Name")["Temperature"].mean().reset_index()

Unnamed: 0,Drug_Name,Temperature
0,diltiazem hydrochloride,24.848485
1,docetaxel injection,30.387097
2,ketamine hydrochloride,17.709677


In [75]:

def temp_mean(x):
  x["Temperature_Avg"] = x["Temperature"].mean()
  return x

In [77]:
data_tidy.groupby(["Drug_Name"]).groups

{'diltiazem hydrochloride': [0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96, 99, 102, 105], 'docetaxel injection': [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49, 52, 55, 58, 61, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97, 100, 103, 106], 'ketamine hydrochloride': [2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 68, 71, 74, 77, 80, 83, 86, 89, 92, 95, 98, 101, 104, 107]}

In [86]:
data_tidy["Temperature"].isna().sum()

13

In [87]:
# Sort of like Windows function
data_tidy = data_tidy.groupby(["Drug_Name"], group_keys=False).apply(temp_mean)

In [88]:
data_tidy.head()

Parameter,Date,Time,Drug_Name,Pressure,Temperature,Temperature_Avg
0,15-10-2020,10:30:00,diltiazem hydrochloride,18.0,20.0,24.848485
1,15-10-2020,10:30:00,docetaxel injection,26.0,23.0,30.387097
2,15-10-2020,10:30:00,ketamine hydrochloride,9.0,22.0,17.709677
3,15-10-2020,11:30:00,diltiazem hydrochloride,19.0,20.0,24.848485
4,15-10-2020,11:30:00,docetaxel injection,29.0,25.0,30.387097


In [89]:
data_tidy["Temperature"].fillna(data_tidy["Temperature_Avg"], inplace=True)

In [90]:
data_tidy["Temperature"].isna().sum()

0

### Do Same for Pressure


In [91]:
data_tidy["Pressure"].isna().sum()

13

In [92]:

def pressure_mean(x):
  x["Pressure_Avg"] = x["Pressure"].mean()
  return x

In [93]:
# Sort of like Windows function
data_tidy = data_tidy.groupby(["Drug_Name"], group_keys=False).apply(pressure_mean)

In [94]:
data_tidy

Parameter,Date,Time,Drug_Name,Pressure,Temperature,Temperature_Avg,Pressure_Avg
0,15-10-2020,10:30:00,diltiazem hydrochloride,18.0,20.0,24.848485,15.424242
1,15-10-2020,10:30:00,docetaxel injection,26.0,23.0,30.387097,25.483871
2,15-10-2020,10:30:00,ketamine hydrochloride,9.0,22.0,17.709677,11.935484
3,15-10-2020,11:30:00,diltiazem hydrochloride,19.0,20.0,24.848485,15.424242
4,15-10-2020,11:30:00,docetaxel injection,29.0,25.0,30.387097,25.483871
...,...,...,...,...,...,...,...
103,17-10-2020,8:30:00,docetaxel injection,26.0,19.0,30.387097,25.483871
104,17-10-2020,8:30:00,ketamine hydrochloride,11.0,20.0,17.709677,11.935484
105,17-10-2020,9:30:00,diltiazem hydrochloride,9.0,13.0,24.848485,15.424242
106,17-10-2020,9:30:00,docetaxel injection,27.0,20.0,30.387097,25.483871


In [95]:
data_tidy["Pressure"].fillna(data_tidy["Pressure_Avg"], inplace=True)

In [96]:
data_tidy["Pressure"].isna().sum()

0

In [115]:
data_tidy.isna().sum()

Date                    0
Time                    0
Drug_Name               0
Pressure                0
Temperature             0
Temperature_Avg         0
Pressure_Avg            0
Temperature_Category    0
dtype: int64

## Pandas Cut

In [98]:
print(data_tidy["Temperature"].min(), data_tidy["Temperature"].max())

8.0 58.0


In [99]:
# Assumption --> temp ranges between 5 and 65

In [100]:
temp_points = [5, 20, 35, 50, 65]
temp_labels = ["low", "medium", "high", "very high"]

In [114]:
data_tidy["Temperature_Category"] = pd.cut(
  data_tidy["Temperature"],
  bins = temp_points,
  labels = temp_labels
)
data_tidy.head()

Unnamed: 0,Date,Time,Drug_Name,Pressure,Temperature,Temperature_Avg,Pressure_Avg,Temperature_Category
0,15-10-2020,10:30:00,diltiazem hydrochloride,18.0,20.0,24.848485,15.424242,low
1,15-10-2020,10:30:00,docetaxel injection,26.0,23.0,30.387097,25.483871,medium
2,15-10-2020,10:30:00,ketamine hydrochloride,9.0,22.0,17.709677,11.935484,medium
3,15-10-2020,11:30:00,diltiazem hydrochloride,19.0,20.0,24.848485,15.424242,low
4,15-10-2020,11:30:00,docetaxel injection,29.0,25.0,30.387097,25.483871,medium


### String Functions

In [104]:
# only rows which contains hydrochloride in the drug name

In [105]:
data_tidy["Drug_Name"].str.contains("hydrochloride")

0       True
1      False
2       True
3       True
4      False
       ...  
103    False
104     True
105     True
106    False
107     True
Name: Drug_Name, Length: 108, dtype: bool

In [112]:
data_tidy[data_tidy["Drug_Name"].str.contains("hydrochloride")]

Unnamed: 0,Date,Time,Drug_Name,Pressure,Temperature,Temperature_Avg,Pressure_Avg,Temperature_Category
0,15-10-2020,10:30:00,diltiazem hydrochloride,18.000000,20.000000,24.848485,15.424242,low
2,15-10-2020,10:30:00,ketamine hydrochloride,9.000000,22.000000,17.709677,11.935484,medium
3,15-10-2020,11:30:00,diltiazem hydrochloride,19.000000,20.000000,24.848485,15.424242,low
5,15-10-2020,11:30:00,ketamine hydrochloride,9.000000,21.000000,17.709677,11.935484,medium
6,15-10-2020,12:30:00,diltiazem hydrochloride,20.000000,21.000000,24.848485,15.424242,medium
...,...,...,...,...,...,...,...,...
101,17-10-2020,7:30:00,ketamine hydrochloride,12.000000,19.000000,17.709677,11.935484,low
102,17-10-2020,8:30:00,diltiazem hydrochloride,15.424242,24.848485,24.848485,15.424242,medium
104,17-10-2020,8:30:00,ketamine hydrochloride,11.000000,20.000000,17.709677,11.935484,low
105,17-10-2020,9:30:00,diltiazem hydrochloride,9.000000,13.000000,24.848485,15.424242,low


In [113]:
data_tidy.loc[data_tidy["Drug_Name"].str.contains("hydrochloride")]

Unnamed: 0,Date,Time,Drug_Name,Pressure,Temperature,Temperature_Avg,Pressure_Avg,Temperature_Category
0,15-10-2020,10:30:00,diltiazem hydrochloride,18.000000,20.000000,24.848485,15.424242,low
2,15-10-2020,10:30:00,ketamine hydrochloride,9.000000,22.000000,17.709677,11.935484,medium
3,15-10-2020,11:30:00,diltiazem hydrochloride,19.000000,20.000000,24.848485,15.424242,low
5,15-10-2020,11:30:00,ketamine hydrochloride,9.000000,21.000000,17.709677,11.935484,medium
6,15-10-2020,12:30:00,diltiazem hydrochloride,20.000000,21.000000,24.848485,15.424242,medium
...,...,...,...,...,...,...,...,...
101,17-10-2020,7:30:00,ketamine hydrochloride,12.000000,19.000000,17.709677,11.935484,low
102,17-10-2020,8:30:00,diltiazem hydrochloride,15.424242,24.848485,24.848485,15.424242,medium
104,17-10-2020,8:30:00,ketamine hydrochloride,11.000000,20.000000,17.709677,11.935484,low
105,17-10-2020,9:30:00,diltiazem hydrochloride,9.000000,13.000000,24.848485,15.424242,low


#### Spliting Values

In [116]:
data_tidy["Date"]

0      15-10-2020
1      15-10-2020
2      15-10-2020
3      15-10-2020
4      15-10-2020
          ...    
103    17-10-2020
104    17-10-2020
105    17-10-2020
106    17-10-2020
107    17-10-2020
Name: Date, Length: 108, dtype: object

In [117]:
data_tidy["Date"].str.split("-")

0      [15, 10, 2020]
1      [15, 10, 2020]
2      [15, 10, 2020]
3      [15, 10, 2020]
4      [15, 10, 2020]
            ...      
103    [17, 10, 2020]
104    [17, 10, 2020]
105    [17, 10, 2020]
106    [17, 10, 2020]
107    [17, 10, 2020]
Name: Date, Length: 108, dtype: object

In [118]:
data_tidy["Date"].str.split("-").apply(lambda x: x[2])

0      2020
1      2020
2      2020
3      2020
4      2020
       ... 
103    2020
104    2020
105    2020
106    2020
107    2020
Name: Date, Length: 108, dtype: object

## Date-Time

In [119]:
data_tidy.head()

Unnamed: 0,Date,Time,Drug_Name,Pressure,Temperature,Temperature_Avg,Pressure_Avg,Temperature_Category
0,15-10-2020,10:30:00,diltiazem hydrochloride,18.0,20.0,24.848485,15.424242,low
1,15-10-2020,10:30:00,docetaxel injection,26.0,23.0,30.387097,25.483871,medium
2,15-10-2020,10:30:00,ketamine hydrochloride,9.0,22.0,17.709677,11.935484,medium
3,15-10-2020,11:30:00,diltiazem hydrochloride,19.0,20.0,24.848485,15.424242,low
4,15-10-2020,11:30:00,docetaxel injection,29.0,25.0,30.387097,25.483871,medium


In [120]:
data_tidy["TimeStamp"] = data_tidy["Date"] + " " + data_tidy["Time"]

In [121]:
data_tidy

Unnamed: 0,Date,Time,Drug_Name,Pressure,Temperature,Temperature_Avg,Pressure_Avg,Temperature_Category,TimeStamp
0,15-10-2020,10:30:00,diltiazem hydrochloride,18.0,20.0,24.848485,15.424242,low,15-10-2020 10:30:00
1,15-10-2020,10:30:00,docetaxel injection,26.0,23.0,30.387097,25.483871,medium,15-10-2020 10:30:00
2,15-10-2020,10:30:00,ketamine hydrochloride,9.0,22.0,17.709677,11.935484,medium,15-10-2020 10:30:00
3,15-10-2020,11:30:00,diltiazem hydrochloride,19.0,20.0,24.848485,15.424242,low,15-10-2020 11:30:00
4,15-10-2020,11:30:00,docetaxel injection,29.0,25.0,30.387097,25.483871,medium,15-10-2020 11:30:00
...,...,...,...,...,...,...,...,...,...
103,17-10-2020,8:30:00,docetaxel injection,26.0,19.0,30.387097,25.483871,low,17-10-2020 8:30:00
104,17-10-2020,8:30:00,ketamine hydrochloride,11.0,20.0,17.709677,11.935484,low,17-10-2020 8:30:00
105,17-10-2020,9:30:00,diltiazem hydrochloride,9.0,13.0,24.848485,15.424242,low,17-10-2020 9:30:00
106,17-10-2020,9:30:00,docetaxel injection,27.0,20.0,30.387097,25.483871,low,17-10-2020 9:30:00


In [122]:
data_tidy.drop(["Date", "Time"], axis=1, inplace=True)

In [123]:
data_tidy.head()

Unnamed: 0,Drug_Name,Pressure,Temperature,Temperature_Avg,Pressure_Avg,Temperature_Category,TimeStamp
0,diltiazem hydrochloride,18.0,20.0,24.848485,15.424242,low,15-10-2020 10:30:00
1,docetaxel injection,26.0,23.0,30.387097,25.483871,medium,15-10-2020 10:30:00
2,ketamine hydrochloride,9.0,22.0,17.709677,11.935484,medium,15-10-2020 10:30:00
3,diltiazem hydrochloride,19.0,20.0,24.848485,15.424242,low,15-10-2020 11:30:00
4,docetaxel injection,29.0,25.0,30.387097,25.483871,medium,15-10-2020 11:30:00


#### Date-Time DataType

In [125]:
type(data_tidy["TimeStamp"][0])

str

In [126]:
data_tidy["TimeStamp"] = pd.to_datetime(data_tidy["TimeStamp"])

  data_tidy["TimeStamp"] = pd.to_datetime(data_tidy["TimeStamp"])


In [127]:
type(data_tidy["TimeStamp"][0])

pandas._libs.tslibs.timestamps.Timestamp

In [128]:
data_tidy.info()

<class 'pandas.core.frame.DataFrame'>
Index: 108 entries, 0 to 107
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Drug_Name             108 non-null    object        
 1   Pressure              108 non-null    float64       
 2   Temperature           108 non-null    float64       
 3   Temperature_Avg       108 non-null    float64       
 4   Pressure_Avg          108 non-null    float64       
 5   Temperature_Category  108 non-null    category      
 6   TimeStamp             108 non-null    datetime64[ns]
dtypes: category(1), datetime64[ns](1), float64(4), object(1)
memory usage: 10.3+ KB


In [130]:
ts = data_tidy["TimeStamp"][0]
ts

Timestamp('2020-10-15 10:30:00')

In [131]:
ts.date

<bound method Timestamp.date of Timestamp('2020-10-15 10:30:00')>

In [132]:
ts.day

15

In [133]:
ts.month

10

In [135]:
ts.year

2020

In [136]:
ts.week

42

In [137]:
ts.hour

10

In [138]:
ts.minute

30

In [139]:
ts.date()

datetime.date(2020, 10, 15)

In [140]:
ts.month_name()

'October'

In [146]:
ts.weekday()

3

In [147]:
ts.day_name()

'Thursday'

In [141]:
ts

Timestamp('2020-10-15 10:30:00')

In [142]:
ts.strftime("%Y")

'2020'

In [143]:
ts.strftime("%y") # Last two dogits of Year

'20'

In [144]:
ts.strftime("%M")

'30'

In [145]:
ts.strftime("%D / %M")

'10/15/20 / 30'

# **Exporting / Writing in a File**

In [148]:
data_tidy.to_csv("Pfizer_Final.csv", sep=",", index=False)