In [None]:
# Install Redis Python client
!pip install redis



In [None]:
import collections
import math
import os
import re
import unittest
import redis

NON_WORDS = re.compile("[^a-z0-9' ]")

STOP_WORDS = set('''a able about across after all almost also am among
an and any are as at be because been but by can cannot could dear did
do does either else ever every for from get got had has have he her
hers him his how however i if in into is it its just least let like
likely may me might most must my neither no nor not of off often on
only or other our own rather said say says she should since so some
than that the their them then there these they this tis to too twas us
wants was we were what when where which while who whom why will with
would yet you your'''.split())

class ScoredIndexSearch(object):
    def __init__(self, prefix, host='localhost', port=6379, password=None):
        # Prefix for all index keys
        self.prefix = prefix.lower().rstrip(':') + ':'
        # Connect to Redis server
        self.connection = redis.Redis(
            host=host,
            port=port,
            password=password,
            decode_responses=True  # ensures strings instead of bytes
        )

    @staticmethod
    def get_index_keys(content, add=True):
        words = NON_WORDS.sub(' ', content.lower()).split()
        words = [word.strip("'") for word in words]
        words = [word for word in words if word not in STOP_WORDS and len(word) > 1]

        if not add:
            return words

        counts = collections.defaultdict(float)
        for word in words:
            counts[word] += 1
        wordcount = len(words)
        tf = {word: count / wordcount for word, count in counts.items()}
        return tf

    def _handle_content(self, id, content, add=True):
        keys = self.get_index_keys(content)
        prefix = self.prefix
        pipe = self.connection.pipeline(False)  # non-transactional pipeline

        if add:
            pipe.sadd(prefix + 'indexed:', id)
            for key, value in keys.items():
                pipe.zadd(prefix + key, {id: value})
        else:
            pipe.srem(prefix + 'indexed:', id)
            for key in keys:
                pipe.zrem(prefix + key, id)

        pipe.execute()
        return len(keys)

    def add_indexed_item(self, id, content):
        return self._handle_content(id, content, add=True)

    def remove_indexed_item(self, id, content):
        return self._handle_content(id, content, add=False)

    def search(self, query_string, offset=0, count=10):
        keys = [self.prefix + key for key in self.get_index_keys(query_string, False)]
        if not keys:
            return [], 0

        total_docs = max(self.connection.scard(self.prefix + 'indexed:'), 1)

        # Get document frequencies
        pipe = self.connection.pipeline(False)
        for key in keys:
            pipe.zcard(key)
        sizes = pipe.execute()

        # Compute IDF
        def idf(count):
            return max(math.log(total_docs / count, 2), 0) if count else 0
        idfs = list(map(idf, sizes))

        weights = {key: idfv for key, size, idfv in zip(keys, sizes, idfs) if size}
        if not weights:
            return [], 0

        temp_key = self.prefix + 'temp:' + os.urandom(8).hex()
        try:
            known = self.connection.zunionstore(temp_key, weights)
            ids = self.connection.zrevrange(temp_key, offset, offset + count - 1, withscores=True)
        finally:
            self.connection.delete(temp_key)
        return ids, known


# ------------------------------
# Example Usage
# ------------------------------

# Replace with your Redis server details

host='redis-17409.c330.asia-south1-1.gce.redns.redis-cloud.com'
port=17409
password="S5cnQnrw3bSN5kawPisZpiSoJlr0B6Fg"

search_index = ScoredIndexSearch('myindex', host=host, port=port, password=password)

# Adding items to index
search_index.add_indexed_item(1, 'hello world')
search_index.add_indexed_item(2, 'this world is nice and you are really special')

# Searching
results, total = search_index.search('hello world')
print("Results:", results)
print("Total documents:", total)

Results: [('1', 0.5), ('2', 0.0)]
Total documents: 2
