# Toronto anomalies analysis
Anomalies detection and performance measurement

### Importing necessary libraries

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
import utils
import os.path

### Loading the data

In [13]:
SITE = 7879
channels = pd.read_csv('../dataset/channels.csv')
channels[channels.siteId == SITE].head()

Unnamed: 0,siteId,channelId,channelName,units,isCalculated
81434,7879,11723,MP1 BTYVOLT,V,False
81435,7879,11722,MP1 UpDEPTH_1,mm,False
81436,7879,11721,MP1 UNIDEPTH,mm,False
81437,7879,11720,MP1 RAWVEL,m/s,False
81438,7879,11719,MP1 PEAKVEL_1,m/s,False


### Assigning the channelId value to variable

In [11]:
q_raw = 14851
mp1_q_final = 11726

### Using utils to load the values from chosen channels

In [12]:
q_raw = utils.load_channel_data(SITE, q_raw).rename('q_raw').fillna(0)
mp1_q_final = utils.load_channel_data(SITE, mp1_q_final).rename('mp1_q_final').fillna(0)

### Short description of the data

In [18]:
q_raw.describe()

count    264891.000000
mean         20.450752
std          92.038402
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max        1574.525513
Name: q_raw, dtype: float64

In [19]:
mp1_q_final.describe()

count    258943.000000
mean        451.324441
std         133.677782
min          84.080002
25%         363.100006
50%         464.700012
75%         518.299988
max        1798.500000
Name: mp1_q_final, dtype: float64

### Adding the difference between raw and final

In [21]:
df = pd.concat([q_raw, mp1_q_final], axis=1).dropna()
df['q_diff'] = np.abs(df.mp1_q_final - df.q_raw)
print('Anomalies in q_raw: {:}'.format(len(df[df['q_diff'] > 1])))
print(df.head())

Anomalies in q_raw: 258943
                     q_raw  mp1_q_final      q_diff
time                                               
2015-10-06 12:55:00    0.0   440.100006  440.100006
2015-10-06 13:00:00    0.0   516.700012  516.700012
2015-10-06 13:05:00    0.0   515.700012  515.700012
2015-10-06 13:10:00    0.0   504.700012  504.700012
2015-10-06 13:15:00    0.0   507.799988  507.799988


### Creating anomalies data frame

In [23]:
anomalies = df[df.q_diff > 1]
anomalies[['q_raw','mp1_q_final','q_diff']].head()

Unnamed: 0_level_0,q_raw,mp1_q_final,q_diff
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-10-06 12:55:00,0.0,440.100006,440.100006
2015-10-06 13:00:00,0.0,516.700012,516.700012
2015-10-06 13:05:00,0.0,515.700012,515.700012
2015-10-06 13:10:00,0.0,504.700012,504.700012
2015-10-06 13:15:00,0.0,507.799988,507.799988


### Short description of the anomalies data frame

In [25]:
anomalies.describe()

Unnamed: 0,q_raw,mp1_q_final,q_diff
count,258943.0,258943.0,258943.0
mean,13.485566,451.324441,437.838875
std,73.432963,133.677782,151.847906
min,0.0,84.080002,16.934204
25%,0.0,363.100006,345.799988
50%,0.0,464.700012,460.799988
75%,0.0,518.299988,516.400024
max,706.461731,1798.5,1798.5
