In [1]:
# Install a Drive FUSE wrapper.
# https://github.com/astrada/google-drive-ocamlfuse
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse

gpg: keybox '/tmp/tmprxt5igi6/pubring.gpg' created
gpg: /tmp/tmprxt5igi6/trustdb.gpg: trustdb created
gpg: key AD5F235DF639B041: public key "Launchpad PPA for Alessandro Strada" imported
gpg: Total number processed: 1
gpg:               imported: 1


In [0]:
# Generate auth tokens for Colab
from google.colab import auth
auth.authenticate_user()

In [3]:
# Generate creds for the Drive FUSE library.
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

··········


In [4]:
!mkdir -p drive
!google-drive-ocamlfuse drive

fuse: mountpoint is not empty
fuse: if you are sure this is safe, use the 'nonempty' mount option


In [5]:
!pip install xlrd



In [0]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display

In [0]:
AQI_FILEPATH = os.path.join(os.curdir, 'drive', 'Colab Notebooks', 'Pandas Training', 'Preprocessing', 'mailiao_aqi.xls')
df = pd.read_excel(AQI_FILEPATH)

In [0]:
df.columns = [str(col) for col in df.columns]  # 把 Column 的名稱全轉換為 Str Type

In [0]:
# 將 Column 名稱由中文修改為英文
columns_hours = [str(h).zfill(2) for h in range(0,24)]  # 時間欄位：00, 01, ..., 23
columns = ['date', 'location', 'item'] + columns_hours
df.columns = columns

In [0]:
# 用不同的小時，來將全部資料分割為各個 ["時間", "觀測項目", "小時"] 的 DataFrame
hours_split_df = [df.loc[:, ['date', 'item' , hour]] for hour in columns_hours]

In [0]:
# 把小時的資料獨立成 "hour" 的 Column
for hour_df in hours_split_df:
    hour_df.loc[:, 'hour'] = hour_df.columns[-1]  # 最後一個 Column 的名稱即為小時
    hour_df.columns = ['date', 'item', 'value', 'hour']  # 幫最後一個 Column 補上名稱

In [0]:
# 合併所有以不同小時區分開來的 DataFrame
df_concat = pd.concat(hours_split_df)

In [0]:
# 製作要轉為日期的欄位
df_concat.loc[:, 'date'] = pd.to_datetime(df_concat['date'])  # 轉為 datetime Type
df_concat.loc[:, 'year'] = df_concat['date'].dt.year  # 年份
df_concat.loc[:, 'month'] = df_concat['date'].dt.month  # 月份
df_concat.loc[:, 'day'] = df_concat['date'].dt.day  # 日期
df_concat.loc[:, 'datetime'] = pd.to_datetime(df_concat[['year', 'month', 'day', 'hour']])  # 用四個欄位製作日期欄位

In [0]:
df_aqi = df_concat.loc[:, ['datetime', 'item', 'value']].copy()  # 保留需要的欄位
df_aqi.to_csv(os.path.join(os.curdir, 'drive', 'Colab Notebooks', 'Pandas Training', 'Preprocessing', 'aqi_nopivot.csv'), index=False)

In [0]:
df_aqi_pivot = df_aqi.pivot(index='datetime', columns='item', values='value')  # 用樞紐分析表轉置
df_aqi_pivot.replace(r'[-]?\D*[.]?\D*[#*x]+', np.nan, regex=True, inplace=True)  # 把 *, #, x 等特殊符號給置換為 NaN

In [0]:
df_aqi_pivot

item,AMB_TEMP,CH4,CO,NMHC,NO,NO2,NOx,O3,PM10,PM2.5,RAINFALL,RH,SO2,THC,WD_HR,WIND_DIREC,WIND_SPEED,WS_HR
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2017-01-01 00:00:00,20,2,0.2,0.04,1.7,7.5,9.2,25,67,18,NR,88,1.5,2,33,37,1.4,0.1
2017-01-01 01:00:00,20,2.2,0.19,0.05,1.9,7.9,9.8,18,52,14,NR,88,2.1,2.2,76,143,0.5,0.6
2017-01-01 02:00:00,19,2.2,0.24,0.08,2,9.3,11,13,59,17,NR,89,2,2.3,140,142,0.5,0.6
2017-01-01 03:00:00,19,2.4,0.24,0.11,1.4,9.5,11,8.5,53,24,NR,90,1.8,2.5,107,51,0.5,0.4
2017-01-01 04:00:00,18,3.4,0.23,0.12,6.8,11,17,1.8,37,25,NR,90,1.7,3.6,96,106,1,0.5
2017-01-01 05:00:00,17,3.6,0.37,0.13,8.4,12,20,2,57,28,NR,91,2,3.7,79,114,0.8,0.7
2017-01-01 06:00:00,18,3.3,0.42,0.13,8.1,12,20,1.9,41,33,NR,91,2,3.4,74,67,1.1,0.9
2017-01-01 07:00:00,18,2.8,0.51,0.19,15,14,29,2.9,60,36,NR,92,2,2.9,33,11,1.1,0.7
2017-01-01 08:00:00,19,2.5,0.54,0.18,18,14,32,5,50,41,NR,93,1.9,2.7,49,40,1.5,1.1
2017-01-01 09:00:00,20,2.2,0.53,0.16,11,15,27,13,74,46,NR,93,3,2.4,51,47,1.9,1.2
