In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from scipy.ndimage import gaussian_filter
from scipy.spatial import KDTree
import numpy as np
import os
import glob
from scipy import io
import cv2

In [None]:
BASE_DATA_PATH = '/kaggle/input/crowd-counting/crowd_dataset'
WORK_DIR = '/kaggle/working/'
PROCESSED_DATA_PATH = os.path.join(WORK_DIR, 'processed_data')
os.makedirs(PROCESSED_DATA_PATH, exist_ok=True)
print(f"Dataset path: {BASE_DATA_PATH}")
print(f"Working directory: {WORK_DIR}")

In [None]:
def generate_gaussian_kernel(sigma, size):
    if size % 2 == 0: size += 1
    x = np.arange(0, size, 1, float); y = x[:, np.newaxis]
    x0 = y0 = size // 2
    g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
    return g / g.sum()

def create_density_map_optimized(image_shape, points, beta=0.3, k=4):
    height, width = image_shape[:2]
    density_map = np.zeros((height, width), dtype=np.float32)
    if len(points) == 0:
        return density_map
    valid_points = [p for p in points if 0 <= p[0] < width and 0 <= p[1] < height]
    points = np.array(valid_points)
    

    if len(points) == 0:
        return density_map


    if len(points) < k:
        for x, y in points:
            density_map[int(y), int(x)] = 1.0
        return gaussian_filter(density_map, sigma=4)

    tree =  KDTree(points)
    distances, _ = tree.query( points, k=k )
    avg_distances = np.mean(distances[:, 1:], axis=1 )
    for i, (x , y) in enumerate(points):
        x_int, y_int = int(x), int(y)
        
        sigma = max(1, beta *  avg_distances[i])
        kernel_size = int(sigma * 6)
        kernel = generate_gaussian_kernel(sigma,  kernel_size)

        x_start = max(0, x_int - kernel_size // 2)
        y_start = max(0, y_int - kernel_size // 2)
        x_end = min(width, x_int + kernel_size // 2  + 1)
        y_end = min(height, y_int + kernel_size // 2 + 1)
        if x_end <= x_start or y_end <= y_start:
            continue

        k_x_start = max(0, kernel_size // 2 - x_int)
        k_y_start = max(0, kernel_size // 2 - y_int)
        k_x_end = k_x_start + (x_end - x_start)
        k_y_end = k_y_start + (y_end - y_start)
        density_map[y_start:y_end, x_start:x_end] += kernel[k_y_start:k_y_end,  k_x_start:k_x_end ]
    return density_map

In [None]:
def preprocess_and_save_data(image_folder, gt_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    image_paths = sorted(glob.glob(os.path.join(image_folder, '*.jpg')))
    for img_path in image_paths:
        basename = os.path.basename(img_path).replace('.jpg', '')
        gt_path = os.path.join(gt_folder, f'GT_{basename}.mat')
        output_file = os.path.join(output_folder, f"{basename}.npy")
        if os.path.exists(output_file):
            continue
        image = cv2.imread(img_path)
        try:
            mat = io.loadmat(gt_path)
            points = mat['image_info'][0, 0][0, 0][0]
        except Exception as e:
            print(f"Error loading {gt_path}: {e}")
            continue
        density_map = create_density_map_optimized(image.shape, points, beta = 0.2)
        np.save(output_file, density_map)
    print(f"Finished preprocessing for {image_folder}")
    
preprocess_and_save_data(
    image_folder=os.path.join(BASE_DATA_PATH, 'train_data/images'),
    gt_folder=os.path.join(BASE_DATA_PATH, 'train_data/ground_truth'),
    output_folder=os.path.join(PROCESSED_DATA_PATH, 'train'))

preprocess_and_save_data(
    image_folder=os.path.join(BASE_DATA_PATH, 'test_data/images'),
    gt_folder=os.path.join(BASE_DATA_PATH, 'test_data/ground_truth'),
    output_folder=os.path.join(PROCESSED_DATA_PATH, 'test'))
print("--- Preprocessing Complete ---")

In [None]:
import zipfile
import os
from IPython.display import FileLink

def zip_dir(directory = os.curdir, file_name = 'directory.zip'):
    os.chdir(directory)
    zip_ref = zipfile.ZipFile(file_name, mode='w')
    for folder, _, files in os.walk(directory):
        for file in files:
            if file_name in file:
                pass
            else:
                zip_ref.write(os.path.join(folder, file))
    return FileLink(file_name)

In [None]:
zip_dir()