# Making instrument status masks from original data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [None]:
path_raw_original = '../data/raw/original/'
filename_pollutants = 'Measurement_item_info.csv'
filename_instruments = 'Measurement_info.csv'

df_pollutants = pd.read_csv(path_raw_original + filename_pollutants)
df_instruments = pd.read_csv(path_raw_original + filename_instruments)


In [None]:
df_pollutants.head(6)

In [None]:
df_instruments

In [None]:
# replace Item code with Item name

pollutants_dict = pd.Series(df_pollutants['Item name'].values,index=df_pollutants['Item code']).to_dict()
df_instruments['Item code'] = df_instruments['Item code'].replace(pollutants_dict)
df_instruments.rename(columns={'Item code': 'Item name'}, inplace=True)
df_instruments

In [None]:
# Are there any NaNs?

df_instruments.isnull().values.any()

In [None]:
# Pivot the dataframe so that the Item names are in columns

df_instruments['idx'] = df_instruments['Measurement date'] + df_instruments['Station code'].astype(str)
df_instruments = df_instruments.drop(columns=['Measurement date', 'Station code', 'Average value'])
df_instruments = df_instruments.pivot(index='idx', columns='Item name', values='Instrument status')


In [None]:
df_instruments

In [None]:
df_instruments.loc['2017-01-01 04:00'+str(112)]

In [None]:
# Create masks where we filter for the specific instrument operation code
# 0: Normal, 1: Need for calibration, 2: Abnormal 4: Power cut off, 8: Under repair, 9: abnormal data

df_instruments_0 = df_instruments.where(df_instruments == 0, False).mask(df_instruments == 0, True)
df_instruments_1 = df_instruments.where(df_instruments != 1, False).mask(df_instruments == 1, True)
df_instruments_2 = df_instruments.where(df_instruments != 2, False).mask(df_instruments == 2, True)
df_instruments_4 = df_instruments.where(df_instruments != 4, False).mask(df_instruments == 4, True)
df_instruments_8 = df_instruments.where(df_instruments != 8, False).mask(df_instruments == 8, True)
df_instruments_9 = df_instruments.where(df_instruments != 9, False).mask(df_instruments == 9, True)

In [None]:
df_instruments_0.loc['2017-01-01 04:00'+str(112)]

In [None]:
df_instruments_9.loc['2017-01-01 04:00'+str(112)]

In [None]:
# pickle masks

df_instruments_0.to_pickle('../data/interim/instrument_mask_0.pkl')
df_instruments_1.to_pickle('../data/interim/instrument_mask_1.pkl')
df_instruments_2.to_pickle('../data/interim/instrument_mask_2.pkl')
df_instruments_4.to_pickle('../data/interim/instrument_mask_4.pkl')
df_instruments_8.to_pickle('../data/interim/instrument_mask_8.pkl')
df_instruments_9.to_pickle('../data/interim/instrument_mask_9.pkl')
df_instruments.to_pickle('../data/interim/instrument_mask_all.pkl')