In [None]:
import datetime as dt

In [None]:
d1 = dt.datetime.now()
print(d1)

2021-02-11 11:00:40.362977


In [None]:
print(d1.day, d1.month,d1.year, d1.hour, d1.minute,d1.second)

11 2 2021 11 0 40


---

In [None]:
d2 = d1 + dt.timedelta(hours = 5, minutes = 30)
print(d2)

2021-02-11 16:30:40.362977


In [None]:
d3 = d1 - dt.timedelta(hours = 7)
print(d3)

2021-02-11 04:00:40.362977


---

In [None]:
s1 = d2.strftime('%B %d, %Y %I : %M : %S %p')
s1

'February 11, 2021 04 : 30 : 40 PM'

In [None]:
d5 = d2.date().strftime('%d %b %Y')
d6 = d2.time().strftime('%I:%M:%S %p')
print(d5 , d6)

11 Feb 2021 04:30:40 PM


---

In [None]:
d7 = dt.datetime.strptime('December 26, 2019 03:34:00 AM', "%B %d, %Y %I:%M:%S %p")
print(d7, type(d7))

2019-12-26 03:34:00 <class 'datetime.datetime'>


In [None]:
nxt = d7 + dt.timedelta(days=6585.3)
print(nxt)

2038-01-05 10:46:00


Last annular solar eclipse will repeat again on 5 January 2038 at about 10:47 AM. You can verify it by opening the document provided below and jumping to page number A-480.

[List of Solar Eclipses](https://eclipse.gsfc.nasa.gov/5MCSE/5MCSE-Maps-10.pdf)


<img src='https://student-datasets-bucket.s3.ap-south-1.amazonaws.com/images/next_annular_solar_eclipse2038.png' width=800>

We can create separate `datetime.date` and `datetime.time` objects by passing values to the `date()` and `time()` functions.

In [None]:
bd = dt.date(2005, 5, 2)
print(bd)

2005-05-02


In [None]:
bd1 = dt.time(14, 5, 3)
print(bd1)

14:05:03


---

#### Activity 7: Continuing Air Quality Analysis

Let's continue with the air quality analysis. We need to load the dataset, remove the `Unnamed: 15` & `Unnamed: 16` columns and drop the null values.

Also, in the previous class, we created a new Pandas series containing the concatenated date and time values.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

csv_file = 'https://student-datasets-bucket.s3.ap-south-1.amazonaws.com/whitehat-ds-datasets/air-quality/AirQualityUCI.csv'
df = pd.read_csv(csv_file, sep=';')

df = df.drop(columns=['Unnamed: 15', 'Unnamed: 16'], axis=1)
df = df.dropna()
dt_series = df['Date'] + ' ' + pd.Series(data=[str(item).replace(".", ":") for item in df['Time']], index=df.index)
dt_series = pd.to_datetime(dt_series)
dt_series

0      2004-10-03 18:00:00
1      2004-10-03 19:00:00
2      2004-10-03 20:00:00
3      2004-10-03 21:00:00
4      2004-10-03 22:00:00
               ...        
9352   2005-04-04 10:00:00
9353   2005-04-04 11:00:00
9354   2005-04-04 12:00:00
9355   2005-04-04 13:00:00
9356   2005-04-04 14:00:00
Length: 9357, dtype: datetime64[ns]

Let's remove the `Date` & `Time` columns from the DataFrame because we don't need them and insert the `dt_series` in it because it contains the `datetime` objects.

In [None]:
df.head()

Unnamed: 0,Date,Time,CO(GT),PT08.S1(CO),NMHC(GT),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH
0,10/03/2004,18.00.00,26,1360.0,150.0,119,1046.0,166.0,1056.0,113.0,1692.0,1268.0,136,489,7578
1,10/03/2004,19.00.00,2,1292.0,112.0,94,955.0,103.0,1174.0,92.0,1559.0,972.0,133,477,7255
2,10/03/2004,20.00.00,22,1402.0,88.0,90,939.0,131.0,1140.0,114.0,1555.0,1074.0,119,540,7502
3,10/03/2004,21.00.00,22,1376.0,80.0,92,948.0,172.0,1092.0,122.0,1584.0,1203.0,110,600,7867
4,10/03/2004,22.00.00,16,1272.0,51.0,65,836.0,131.0,1205.0,116.0,1490.0,1110.0,112,596,7888


Let's add the `dt_series` Pandas series to the DataFrame at `index = 0`. Also, let's label it as a `DateTime` column.

In [None]:
df = df.drop(['Date','Time'],axis = 1)
df.insert(loc =0,column = 'DateTime',value = dt_series)


In [None]:
df.head()

Unnamed: 0,DateTime,CO(GT),PT08.S1(CO),NMHC(GT),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH
0,2004-10-03 18:00:00,26,1360.0,150.0,119,1046.0,166.0,1056.0,113.0,1692.0,1268.0,136,489,7578
1,2004-10-03 19:00:00,2,1292.0,112.0,94,955.0,103.0,1174.0,92.0,1559.0,972.0,133,477,7255
2,2004-10-03 20:00:00,22,1402.0,88.0,90,939.0,131.0,1140.0,114.0,1555.0,1074.0,119,540,7502
3,2004-10-03 21:00:00,22,1376.0,80.0,92,948.0,172.0,1092.0,122.0,1584.0,1203.0,110,600,7867
4,2004-10-03 22:00:00,16,1272.0,51.0,65,836.0,131.0,1205.0,116.0,1490.0,1110.0,112,596,7888


---

In [None]:
y_s = dt_series.dt.year
y_s

0       2004
1       2004
2       2004
3       2004
4       2004
        ... 
9352    2005
9353    2005
9354    2005
9355    2005
9356    2005
Length: 9357, dtype: int64

In [None]:
ms = dt_series.dt.month
ms

0       10
1       10
2       10
3       10
4       10
        ..
9352     4
9353     4
9354     4
9355     4
9356     4
Length: 9357, dtype: int64

In [None]:
ds = dt_series.dt.day
ds

0       3
1       3
2       3
3       3
4       3
       ..
9352    4
9353    4
9354    4
9355    4
9356    4
Length: 9357, dtype: int64

In [None]:
dns = dt_series.dt.day_name()
dns

0       Sunday
1       Sunday
2       Sunday
3       Sunday
4       Sunday
         ...  
9352    Monday
9353    Monday
9354    Monday
9355    Monday
9356    Monday
Length: 9357, dtype: object

In [None]:
df['Year'] = y_s
df['Month'] = ms
df['Day'] = ds
df['Day of the Week'] = dns
df.head()

Unnamed: 0,DateTime,CO(GT),PT08.S1(CO),NMHC(GT),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH,Year,Month,Day,Day of the Week
0,2004-10-03 18:00:00,26,1360.0,150.0,119,1046.0,166.0,1056.0,113.0,1692.0,1268.0,136,489,7578,2004,10,3,Sunday
1,2004-10-03 19:00:00,2,1292.0,112.0,94,955.0,103.0,1174.0,92.0,1559.0,972.0,133,477,7255,2004,10,3,Sunday
2,2004-10-03 20:00:00,22,1402.0,88.0,90,939.0,131.0,1140.0,114.0,1555.0,1074.0,119,540,7502,2004,10,3,Sunday
3,2004-10-03 21:00:00,22,1376.0,80.0,92,948.0,172.0,1092.0,122.0,1584.0,1203.0,110,600,7867,2004,10,3,Sunday
4,2004-10-03 22:00:00,16,1272.0,51.0,65,836.0,131.0,1205.0,116.0,1490.0,1110.0,112,596,7888,2004,10,3,Sunday


In [None]:
df = df.sort_values(by = 'DateTime')
df.head(30)

Unnamed: 0,DateTime,CO(GT),PT08.S1(CO),NMHC(GT),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH,Year,Month,Day,Day of the Week
510,2004-01-04 00:00:00,16,1143.0,106.0,63,825.0,96.0,986.0,86.0,1477.0,978.0,120,616,8593,2004,1,4,Sunday
511,2004-01-04 01:00:00,12,1044.0,100.0,51,770.0,85.0,1031.0,70.0,1425.0,944.0,115,639,8652,2004,1,4,Sunday
512,2004-01-04 02:00:00,11,1034.0,71.0,41,716.0,50.0,1085.0,55.0,1405.0,891.0,107,672,8630,2004,1,4,Sunday
513,2004-01-04 03:00:00,9,956.0,72.0,40,713.0,-200.0,1099.0,-200.0,1422.0,849.0,90,731,8394,2004,1,4,Sunday
514,2004-01-04 04:00:00,7,909.0,44.0,24,615.0,57.0,1237.0,49.0,1322.0,790.0,102,666,8299,2004,1,4,Sunday
515,2004-01-04 05:00:00,9,996.0,45.0,29,648.0,64.0,1176.0,50.0,1340.0,852.0,110,637,8325,2004,1,4,Sunday
516,2004-01-04 06:00:00,17,1154.0,134.0,74,876.0,153.0,1002.0,67.0,1561.0,987.0,96,688,8243,2004,1,4,Sunday
517,2004-01-04 07:00:00,42,1510.0,505.0,198,1291.0,342.0,675.0,94.0,1949.0,1435.0,95,696,8273,2004,1,4,Sunday
518,2004-01-04 08:00:00,62,1722.0,1042.0,319,1595.0,378.0,539.0,119.0,2439.0,1798.0,119,609,8455,2004,1,4,Sunday
519,2004-01-04 09:00:00,46,1512.0,737.0,210,1323.0,304.0,631.0,139.0,2001.0,1677.0,162,486,8892,2004,1,4,Sunday


---