<a href="https://colab.research.google.com/github/aalemi97/AFRetryRequest/blob/master/Visualizers/UnitMetrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Colab Configuration

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 13.6 gigabytes of available RAM

Not using a high-RAM runtime


# MetricExtractor

In [3]:
import numpy as np
import pandas as pd

class MetricExtractor(object):

  zonesDF = None
  zones = []
  observationsDF = None

  def __init__(self,
               zonesDFPath,
               observationsDFPath):
    self.__buildZonesDF__(zonesDFPath=zonesDFPath)
    self.__buildObservationsDF__(observationsDFPath=observationsDFPath)
    return

  def extractData(self):
    df = self.observationsDF.groupby(["Unit ID", "Date"]).agg(list)
    df = df.rename(columns={"Zone_Number": "Observed Zones ID"})

    df["Daily Tour Duration (Hour)"] = \
    list(map(lambda x: (max(x) - min(x)).seconds/3600, df["timezone"]))

    df["Number of Observations"] = \
    list(map(lambda x: len(set(x)), df["Licence Plate"]))

    df["Number of Violations"] = \
    list(map(lambda x: sum(x), df["isViolation"]))

    df["Number of Observed Zones"] = \
    list(map(lambda x: len(set(x)), df["Observed Zones ID"]))

    df = df.drop(columns=["timezone", "Licence Plate", "isViolation"])

    df = df.loc[:, ["Daily Tour Duration (Hour)",
                    "Number of Observations",
                    "Number of Violations",
                    "Number of Observed Zones",
                    "Observed Zones ID"]]
    df = df.reset_index()
    return df

  def __buildObservationsDF__(self, observationsDFPath):
    dataFrame = pd.read_pickle(observationsDFPath)
    ids = dataFrame["Unit ID"].tolist()
    ids = list(filter(self.__isAgent__, ids))
    dataFrame = dataFrame[dataFrame["Unit ID"].isin(ids)]
    dataFrame["Zone_Number"] = dataFrame["Zone_Number"].map(self.__convertToInt__)
    dataFrame = dataFrame[dataFrame["Zone_Number"].isin(self.zones)]
    dataFrame["timezone"] = pd.to_datetime(dataFrame["timezone"])
    dataFrame["Date"] = dataFrame["timezone"].dt.date
    self.observationsDF = dataFrame.drop(columns=["Observing Officer ID",
                                                  "longitude",
                                                  "latitude",
                                                  "address"])
    return
    
  def __buildZonesDF__(self,
                      zonesDFPath):
    self.zonesDF = pd.read_csv(zonesDFPath)
    self.zones = self.zonesDF["Zone Number"].tolist()
    return

  def __isAgent__(self, id):
    if type(id) != str:
      return False
    return id.startswith("Unit")

  def __convertToInt__(self,num):
    try:
      result = int(num)
      return result
    except:
      return -100

In [4]:
zonesDFPath = "/content/drive/MyDrive/UCalgary/Thesis/CPA Data/Zones/FinalZonesDF.csv"
observationsDFPath = "/content/drive/MyDrive/UCalgary/Thesis/CPA Data/Observations/Universal/RawObservationsDF.pkl"

In [5]:
extractor = MetricExtractor(zonesDFPath=zonesDFPath,
                            observationsDFPath=observationsDFPath)

In [6]:
dataFrame = extractor.extractData()

In [7]:
dataFrame.to_pickle("/content/drive/MyDrive/UCalgary/Thesis/CPA Data/Visualizations/Units/UnitsDF.pkl")

In [8]:
summaryDF = dataFrame.groupby(["Unit ID"]).mean()

  summaryDF = dataFrame.groupby(["Unit ID"]).mean()


In [9]:
summaryDF["Violations to Observations Ratio (%)"] = summaryDF["Number of Violations"] / summaryDF["Number of Observations"] * 100
summaryDF["Observation to Duration Ratio"] = summaryDF["Number of Observations"] / summaryDF["Daily Tour Duration (Hour)"]

In [10]:
summaryDF.round(2)

Unnamed: 0_level_0,Daily Tour Duration (Hour),Number of Observations,Number of Violations,Number of Observed Zones,Violations to Observations Ratio (%),Observation to Duration Ratio
Unit ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Unit 1 - 6348,6.14,568.71,41.55,93.34,7.31,92.57
Unit 10 - 6351,5.9,691.87,59.54,115.8,8.61,117.23
Unit 11 - 6352,6.09,672.46,49.25,119.93,7.32,110.5
Unit 2 - 6373,6.14,383.01,24.31,67.44,6.35,62.42
Unit 3 - 6346,5.24,419.62,37.78,71.04,9.0,80.13
Unit 4 - 6350,5.17,522.84,39.87,88.19,7.63,101.12
Unit 5 - 6347,5.41,508.38,44.16,91.84,8.69,93.93
Unit 6 - 6358,5.3,730.2,57.34,116.53,7.85,137.9
Unit 7 - 6360,5.11,569.2,36.08,90.51,6.34,111.28
Unit 8 - 6349,7.07,580.86,45.24,95.71,7.79,82.17


In [11]:
summaryDF.to_pickle("/content/drive/MyDrive/UCalgary/Thesis/CPA Data/Visualizations/Units/UnitsSummaryDF.pkl")