In [None]:
import os
import re
from collections import defaultdict

# Step 1: Read Dataset
dataset_path = "/content/webkb-data.gtar.gz"
webkb_documents = os.listdir(dataset_path)

# Initialize data structures for pages-information and words-information tables
pages_information = defaultdict(dict)
words_information = defaultdict(list)

# Step 2: Process each open document
for document in webkb_documents:
    doc_path = os.path.join(dataset_path, document)
    doc_id = hash(doc_path)  # You can use a better ID generation method

    # Step 2: Extract information from document
    with open(doc_path, "r") as f:
        doc_contents = f.read()

    pages_information[doc_id]["p-name"] = document
    pages_information[doc_id]["total-weight"] = 0
    pages_information[doc_id]["total-count-word"] = 0

    # Step 3: Tokenization and processing
    words = re.findall(r'\w+', doc_contents.lower())  # Simple tokenization

    for word in words:
        # Check if word is special character or stop-word
        if word in ["special", "stop", "words"]:
            continue

        # Process the word
        pages_information[doc_id]["total-weight"] += 1
        pages_information[doc_id]["total-count-word"] += 1
        words_information[word].append(doc_id)

# Step 5 and 6: Display pages-information and words-information tables
print("Pages-Information Table:")
for doc_id, info in pages_information.items():
    print(f"ID: {doc_id}, p-name: {info['p-name']}, total-weight: {info['total-weight']}, total-count-word: {info['total-count-word']}")

print("\nWords-Information Table:")
for word, pages_list in words_information.items():
    print(f"Word: {word}, pages-list: {pages_list}")

NotADirectoryError: ignored

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def search_algorithm(user_query, words_information_table):
    QW = []
    X = {}
    ID_list = []

    # Step 3: Loop through each word in the user query
    for word in user_query.split():
        X[word] = word.split()

        # Step 8: Get pages-list for each word
        QW_word_pages_list = words_information_table.get(word, [])
        QW.append((word, QW_word_pages_list))

        # Step 10: Merge pages-list for each word
        if ID_list:
            ID_list = list(set(ID_list) & set(QW_word_pages_list))
        else:
            ID_list = QW_word_pages_list

    # Step 12: Return ID-List array
    return ID_list

# Example usage
user_query = input("Enter your query: ")
words_information_table = {
    "example": [1, 2, 3],
    "query": [2, 3, 4],
    "string": [1, 4, 5]
}

result = search_algorithm(user_query, words_information_table)
print("Result:", result)

Enter your query: [1, 2, 3]
Result: []


In [None]:
import random

def encode_problem(problem):
  """Encodes the problem into a chromosome."""
  chromosome = []
  for i in range(len(problem)):
    chromosome.append(random.randint(0, 1))
  return chromosome

def initial_generation(population_size, problem):
  """Generates the initial population."""
  population = []
  for _ in range(population_size):
    population.append(encode_problem(problem))
  return population

def evaluate_fitness(chromosome, problem):
  """Evaluates the fitness of the chromosome."""
  fitness = 0
  for i in range(len(problem)):
    if chromosome[i] == problem[i]:
      fitness += 1
  return fitness

def selection(population, fitness):
  """Selects the parents for the next generation."""
  parents = []
  for _ in range(len(population)):
    parents.append(population[random.randint(0, len(population) - 1)])
  return parents

def crossover(parents):
  """Performs crossover on the parents to create offspring."""
  offspring = []
  for i in range(len(parents) // 2):
    offspring.append(parents[2 * i][:len(parents[0]) // 2] + parents[2 * i + 1][len(parents[0]) // 2:])
    offspring.append(parents[2 * i + 1][:len(parents[0]) // 2] + parents[2 * i][len(parents[0]) // 2:])
  return offspring

def mutation(offspring):
  """Performs mutation on the offspring."""
  for i in range(len(offspring)):
    for j in range(len(offspring[0])):
      if random.random() < 0.1:
        offspring[i][j] = 1 - offspring[i][j]
  return offspring

def genetic_algorithm(problem, population_size, generations):
  """Runs the genetic algorithm."""
  population = initial_generation(population_size, problem)
  for _ in range(generations):
    fitness = [evaluate_fitness(chromosome, problem) for chromosome in population]
    parents = selection(population, fitness)
    offspring = crossover(parents)
    offspring = mutation(offspring)
    population = offspring
  best_chromosome = population[0]
  return best_chromosome

if __name__ == "__main__":
  problem = [0, 1, 0, 1, 0, 1]
  best_chromosome = genetic_algorithm(problem, 100, 100)
  print(best_chromosome)

[1, 1, 1, 0, 0, 1]


In [None]:
import random

def swap_mutation(population, selected_indices):
  """Performs swap mutation on the population."""
  for i in selected_indices:
    cost1 = population[i].fitness
    cost2 = population[i].fitness

    r1 = random.randint(0, len(population) - 1)
    r2 = random.randint(0, len(population) - 1)

    population[i].fitness, population[r1].fitness = cost1, cost2
    population[r2].fitness, population[i].fitness = cost1, cost2

