# Imports

In [1]:
import numpy as np
import pandas as pd
from typing import Optional
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage, dendrogram
import matplotlib.pyplot as plt

In [2]:
coupon_df = pd.read_csv('data/18929485529.csv')
print(coupon_df.head())

            ID  User ID                     Time  I Language Application Name  \
0  18929485529   165559  2024-09-04T10:55:25.287  1       de            PENNY   
1  18929485529   165559  2024-09-04T10:55:25.287  2       de            PENNY   
2  18929485529   165559  2024-09-04T10:55:25.287  3       de            PENNY   
3  18929485529   165559  2024-09-04T10:55:25.287  4       de            PENNY   
4  18929485529   165559  2024-09-04T10:55:25.287  5       de            PENNY   

   Package Name                           Class Name  Context  \
0  de.penny.app  de.penny.app.main.view.MainActivity      NaN   
1  de.penny.app  de.penny.app.main.view.MainActivity      NaN   
2  de.penny.app  de.penny.app.main.view.MainActivity      NaN   
3  de.penny.app  de.penny.app.main.view.MainActivity      NaN   
4  de.penny.app  de.penny.app.main.view.MainActivity      NaN   

              View ID  View Depth                      View Class Name  \
0                 NaN           0  de.penny.app.

In [3]:
class BoundingBox:
    def __init__(self, x1, y1, x2, y2):
        self.x1 = min(x1, x2)
        self.y1 = min(y1, y2)
        self.x2 = max(x1, x2)
        self.y2 = max(y1, y2)

    def intersects(self, other: 'BoundingBox') -> bool:
        if self.x1 >= other.x2 or self.x2 <= other.x1:
            return False

        if self.y1 >= other.y2 or self.y2 <= other.y1:
            return False

        return True

    def merge(self, other: 'BoundingBox') -> 'BoundingBox':
        return BoundingBox(
            min(self.x1, other.x1),
            min(self.y1, other.y1),
            max(self.x2, other.x2),
            max(self.y2, other.y2)
        )

    def intersection(self, other: 'BoundingBox') -> Optional['BoundingBox']:
        if not self.intersects(other):
            return None

        return BoundingBox(
            max(self.x1, other.x1),
            max(self.y1, other.y1),
            min(self.x2, other.x2),
            min(self.y2, other.y2)
        )

    def area(self):
        return (self.x2 - self.x1) * (self.y2 - self.y1)

    def IoU(self, other: 'BoundingBox'):
        if not self.intersects(other):
            return 0

        inter_area = self.intersection(other).area()

        return inter_area / (self.area() + other.area() - inter_area)

    def apply_bias(self, xbias, ybias):
        return BoundingBox(
            self.x1 - xbias,
            self.y1 - ybias,
            self.x2 + xbias,
            self.y2 + ybias
        )

In [4]:
def create_bounding_box(row):
    return BoundingBox(
        row['X 1'],
        row['Y 1'],
        row['X 2'],
        row['Y 2']
    )

coupon_df_mod = coupon_df.copy()
coupon_df_mod['Bounding Box'] = coupon_df_mod.apply(create_bounding_box, axis=1)
coupon_df_mod.drop(columns=['X 1', 'Y 1', 'X 2', 'Y 2'], inplace=True)

In [5]:
grouping_columns = ['Application Name', 'Seen Timestamp']
grouped_coupon_dfs = [group for _, group in coupon_df_mod.groupby(grouping_columns)]

In [8]:
test_df = grouped_coupon_dfs[2]
num_rows = test_df.shape[0]
dists = np.empty((num_rows, num_rows))
xbias = 10
ybias = 30

for i in range(num_rows):
    for j in range(num_rows):
        dists[i, j] = test_df['Bounding Box'].iloc[i].apply_bias(xbias, ybias).IoU(test_df['Bounding Box'].iloc[j].apply_bias(xbias, ybias))

print(dists)

[[1.         0.23828954 0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.23828954 1.         0.         0.         0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         1.         0.0516851  0.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.0516851  1.         0.10298585 0.
  0.         0.         0.         0.        ]
 [0.         0.         0.         0.10298585 1.         0.
  0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         1.
  0.24856597 0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.24856597
  1.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         1.         0.08890167 0.00341013]
 [0.         0.         0.         0.         0.         0.
  0.         0.08890167 1.         0.17217887]
 [0.         0.         0.   