# Making instrument status masks from original data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [None]:
path_raw_original = '../src/data/raw/original/'
filename_pollutants = 'Measurement_item_info.csv'
filename_instruments = 'Measurement_info.csv'

df_pollutants = pd.read_csv(path_raw_original + filename_pollutants)
df_instruments = pd.read_csv(path_raw_original + filename_instruments)


In [None]:
df_pollutants.head(6)

In [None]:
df_instruments

In [None]:
# replace Item code with Item name

pollutants_dict = pd.Series(df_pollutants['Item name'].values,index=df_pollutants['Item code']).to_dict()
df_instruments['Item code'] = df_instruments['Item code'].replace(pollutants_dict)
df_instruments.rename(columns={'Item code': 'Item name'}, inplace=True)
df_instruments

In [None]:
# Are there any NaNs?

df_instruments.isnull().values.any()

In [None]:
# Pivot the dataframe so that the Item names are in columns

df_instruments['idx'] = df_instruments['Measurement date'] + df_instruments['Station code'].astype(str)
df_instruments = df_instruments.drop(columns=['Measurement date', 'Station code', 'Average value'])
df_instruments = df_instruments.pivot(index='idx', columns='Item name', values='Instrument status')


In [None]:
# order columns the same as summary df

cols = ['SO2', 'NO2', 'O3','CO','PM10','PM2.5']
df_instruments = df_instruments[cols]


In [None]:
df_instruments

In [None]:
df_instruments.loc['2017-01-01 04:00'+str(112)]

In [None]:
# Create masks where we filter for the specific instrument operation code
# 0: Normal, 1: Need for calibration, 2: Abnormal 4: Power cut off, 8: Under repair, 9: abnormal data

for i in [0,1,2,4,8,9]:
    df_sub = df_instruments.where(df_instruments == i, False).mask(df_instruments == i, True)[cols]
    df_sub.to_pickle('../src/data/interim/instrument_mask_{}.pkl'.format(i))
    print('Mask for instrument status code {}:\n'.format(i),
          df_sub.loc['2017-01-01 04:00'+str(112)])
