In [1]:
class DGIM:
    def __init__(self, window_size):
        self.window_size = window_size
        self.buckets = []
    
    def add_bit(self, bit):
        if bit == 1:
            self.buckets.append((1, len(self.buckets)))
            self.compress_buckets()
        else:
            self.buckets.append((0, 0))
        
        if len(self.buckets) > 2*self.window_size:
            self.buckets.pop(0)
    
    def compress_buckets(self):
        while len(self.buckets) >= 3:
            if self.buckets[-3][0] == self.buckets[-2][0] == self.buckets[-1][0]:
                self.buckets[-3] = (self.buckets[-3][0], self.buckets[-1][1])
                self.buckets.pop()
            else:
                break
    
    def query(self, k):
        num_ones = 0
        for count, timestamp in reversed(self.buckets):
            if timestamp >= len(self.buckets) - k:
                num_ones += count
        return num_ones
stream = [1, 0, 1, 1, 0, 1, 0, 0, 1, 0]
k = 4

dgim = DGIM(k)
for bit in stream:
    dgim.add_bit(bit)

num_ones = dgim.query(k)
print(num_ones)  # output: 2


2


In [2]:
import hashlib

class CountMinSketch:
    def __init__(self, m, k):
        self.m = m  # number of rows
        self.k = k  # number of columns
        self.table = [[0 for j in range(self.k)] for i in range(self.m)]

    def update(self, key, count):
        for i in range(self.m):
            hash_value = int(hashlib.md5((str(key) + str(i)).encode()).hexdigest(), 16)
            j = hash_value % self.k
            self.table[i][j] += count

    def query(self, key):
        min_count = float('inf')
        for i in range(self.m):
            hash_value = int(hashlib.md5((str(key) + str(i)).encode()).hexdigest(), 16)
            j = hash_value % self.k
            count = self.table[i][j]
            min_count = min(min_count, count)
        return min_count

# Example usage
stream = [1, 5, 8, 9, 2]
cms = CountMinSketch(m=10, k=100)
for element in stream:
    cms.update(element, 1)
for element in stream:
    frequency = cms.query(element)
    print(f"Element {element} has frequency {frequency}")


Element 1 has frequency 1
Element 5 has frequency 1
Element 8 has frequency 1
Element 9 has frequency 1
Element 2 has frequency 1


In [3]:
import random
from collections import Counter
from statistics import mean

# Example hashtag stream generator
def generate_hashtag_stream(num_hashtags):
    hashtags = ["#python", "#datascience", "#machinelearning", "#AI", "#bigdata"]
    for i in range(num_hashtags):
        yield random.choice(hashtags)

# Example of processing the hashtag stream
def process_hashtag_stream(hashtag_stream, window_size, smoothing_factor):
    counts = Counter()
    trending_hashtag = None
    max_count = 0

    for hashtag in hashtag_stream:
        counts[hashtag] += 1

        # Check if current hashtag is trending
        if counts[hashtag] > max_count:
            trending_hashtag = hashtag
            max_count = counts[hashtag]

        # Remove hashtags that fall outside the window
        if len(counts) > window_size:
            counts.subtract({counts.most_common()[-1][0]: 1})

        # Compute smoothed aggregation
        if len(counts) > 0:
            values = [counts[h] for h in counts.keys()]
            smoothed_value = mean(values) * smoothing_factor + max(values) * (1 - smoothing_factor)
            print(f"Trending: {trending_hashtag}, Smoothed Value: {smoothed_value}")

# Example usage
hashtag_stream = generate_hashtag_stream(1000)
process_hashtag_stream(hashtag_stream, 10, 0.7)


Trending: #AI, Smoothed Value: 1.0
Trending: #AI, Smoothed Value: 1.0
Trending: #AI, Smoothed Value: 1.0
Trending: #AI, Smoothed Value: 1.0
Trending: #bigdata, Smoothed Value: 1.475
Trending: #bigdata, Smoothed Value: 1.65
Trending: #bigdata, Smoothed Value: 1.825
Trending: #python, Smoothed Value: 2.3
Trending: #python, Smoothed Value: 2.16
Trending: #python, Smoothed Value: 2.3
Trending: #python, Smoothed Value: 2.74
Trending: #python, Smoothed Value: 2.88
Trending: #python, Smoothed Value: 3.02
Trending: #bigdata, Smoothed Value: 3.46
Trending: #bigdata, Smoothed Value: 3.5999999999999996
Trending: #bigdata, Smoothed Value: 3.74
Trending: #bigdata, Smoothed Value: 3.88
Trending: #bigdata, Smoothed Value: 4.0200000000000005
Trending: #bigdata, Smoothed Value: 4.16
Trending: #bigdata, Smoothed Value: 4.3
Trending: #bigdata, Smoothed Value: 4.44
Trending: #python, Smoothed Value: 4.880000000000001
Trending: #python, Smoothed Value: 5.02
Trending: #python, Smoothed Value: 5.460000000000