In [1]:
!pip show torchvision

Name: torchvision
Version: 0.15.1
Summary: image and video datasets and models for torch deep learning
Home-page: https://github.com/pytorch/vision
Author: PyTorch Core Team
Author-email: soumith@pytorch.org
License: BSD
Location: /opt/conda/lib/python3.10/site-packages
Requires: numpy, pillow, requests, torch
Required-by: easyocr, fastai, timm


In [2]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from datetime import datetime, timezone, timedelta
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
import torch
import torchvision
from torchvision.io import read_image
from torchvision.transforms import v2 as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor



# Object detection data prep

In [3]:
working_folder = Path("/kaggle/working/")
images_folder = working_folder/"images"
images_folder.mkdir()

In [4]:
data_folder = Path("/kaggle/input/child-mind-institute-detect-sleep-states")
!ls {data_folder}

sample_submission.csv  train_events.csv
test_series.parquet    train_series.parquet


In [5]:
test_series = pd.read_parquet(data_folder/"test_series.parquet")
test_series

Unnamed: 0,series_id,step,timestamp,anglez,enmo
0,038441c925bb,0,2018-08-14T15:30:00-0400,2.636700,0.0217
1,038441c925bb,1,2018-08-14T15:30:05-0400,2.636800,0.0215
2,038441c925bb,2,2018-08-14T15:30:10-0400,2.637000,0.0216
3,038441c925bb,3,2018-08-14T15:30:15-0400,2.636800,0.0213
4,038441c925bb,4,2018-08-14T15:30:20-0400,2.636800,0.0215
...,...,...,...,...,...
445,0402a003dae9,145,2018-12-18T12:57:05-0500,-59.696899,0.0601
446,0402a003dae9,146,2018-12-18T12:57:10-0500,-35.656601,0.0427
447,0402a003dae9,147,2018-12-18T12:57:15-0500,-21.582399,0.0309
448,0402a003dae9,148,2018-12-18T12:57:20-0500,-42.616001,0.0328


In [6]:
test_series['large_enmo'] = test_series['enmo'] > 0.2

In [7]:
series_ids = test_series['series_id'].unique()
len(series_ids)

3

In [8]:
window_properties = []
for i in range(len(series_ids)):
    print("i:", i)
    print("series_id:", series_ids[i])
    series = test_series.loc[test_series['series_id'] == series_ids[i]].reset_index(drop=True)
    series['color'] = ["blue" if large_enmo else "green" for large_enmo in series['large_enmo']]
    series['timestamp'] = pd.to_datetime(series['timestamp'])
    series['timestamp_utc'] = series['timestamp'].map(lambda timestamp: timestamp.astimezone(timezone.utc))
    series['anglez_radians'] = (np.pi / 180) * series['anglez']
    series['cos_anglez'] = np.cos(series['anglez_radians'])
    series['enmo'] = np.clip(series['enmo'], 0, 1)
    min_date_utc = series['timestamp_utc'].dt.date.min()
    max_date_utc = series['timestamp_utc'].dt.date.max()
    series_24_hour_windows = {}    
    upper_bound = datetime(year=min_date_utc.year, month=min_date_utc.month, day=min_date_utc.day, hour=20, minute=30, tzinfo=timezone.utc)
    lower_bound = upper_bound + timedelta(hours=-24) # 8:30pm UTC on the previous day.
    while lower_bound < series['timestamp_utc'].max():
        window_df = series.loc[(series['timestamp_utc'] >= lower_bound) & (series['timestamp_utc'] < upper_bound)].reset_index(drop=True)
        if len(window_df) > 0:
            series_24_hour_windows[upper_bound.isoformat()[:-6]] = window_df
        upper_bound += timedelta(hours=24)
        lower_bound += timedelta(hours=24)
    
    windows = list(series_24_hour_windows.keys())
    num_steps_cumulative = 0
    for window_idx, window in enumerate(windows):        
        fig = plt.figure(figsize=(14.4, 4))  # (width, height) in inches
        plt.plot(series_24_hour_windows[window]['timestamp_utc'], series_24_hour_windows[window]['cos_anglez'], color="red")
        plt.scatter(series_24_hour_windows[window]['timestamp_utc'], series_24_hour_windows[window]['enmo'], color=series_24_hour_windows[window]['color'], s=1)
        ax = plt.gca()
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)
        ax.set_xticks([])
        ax.set_yticks([])
        plt.margins(0, 0)
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
        plt.savefig(images_folder/f"{series_ids[i]}_{window}.jpg", bbox_inches="tight", pad_inches=0)
        plt.clf()
        plt.cla()
        plt.close()

        min_ts_window = series_24_hour_windows[window]['timestamp_utc'].min()
        max_ts_window = series_24_hour_windows[window]['timestamp_utc'].max()
        num_steps_window = (max_ts_window - min_ts_window).total_seconds() / 5 + 1
        num_steps_cumulative += num_steps_window
        window_properties.append({
            'series_id': series_ids[i], 
            'image_name': f"{series_ids[i]}_{window}.jpg", 
            'idx_in_series': window_idx, 
            'num_steps_window': num_steps_window, 
            'num_steps_cumulative': num_steps_cumulative
        })

i: 0
series_id: 038441c925bb
i: 1
series_id: 03d92c9f6f8a
i: 2
series_id: 0402a003dae9


In [9]:
window_properties_df = pd.DataFrame(window_properties)
window_properties_df

Unnamed: 0,series_id,image_name,idx_in_series,num_steps_window,num_steps_cumulative
0,038441c925bb,038441c925bb_2018-08-14T20:30:00.jpg,0,150.0,150.0
1,03d92c9f6f8a,03d92c9f6f8a_2018-05-31T20:30:00.jpg,0,150.0,150.0
2,0402a003dae9,0402a003dae9_2018-12-18T20:30:00.jpg,0,150.0,150.0


# Transforms

In [10]:
def get_transforms():
    transforms = []
    transforms.append(T.ConvertDtype(torch.float))
    transforms.append(T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))
    return T.Compose(transforms)

From the <a href="https://pytorch.org/vision/0.15/generated/torchvision.transforms.v2.ConvertDtype.html" target="_blank">documentation</a>:

> Convert input image or video to the given `dtype` and scale the values accordingly.

In [11]:
eval_transforms = get_transforms()

# Instantiating the model

In [12]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cuda')

In [13]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None, weights_backbone=None, max_size=1440)
# Replace the classifier with a new one, that has num_classes which is user-defined:
num_classes = 3 # 2 classes ('onset' & 'wakeup') + the 'background' class
# Get the number of input features for the box classifier:
in_features = model.roi_heads.box_predictor.cls_score.in_features
# Replace the pre-trained box predictor head with a new one:
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [14]:
weights = torch.load("/kaggle/input/d-s-s-faster-r-cnn-modeling/m12_4.pth", map_location=device)
model.load_state_dict(weights)

<All keys matched successfully>

In [15]:
model.to(device);

# Inference

In [16]:
model.eval()
thresh = 0.0 # Tune this.
reverse_label_mapping = {1: "onset", 2: "wakeup"}

In [17]:
def get_submission_df():
    all_test_preds = []
    for i, series_id in enumerate(series_ids): # i is the index of the series.
        series_rows = window_properties_df.loc[window_properties_df['series_id'] == series_id].reset_index(drop=True)
        for j in range(len(series_rows)): # j is the index of the image (within the series).
            image_name = series_rows['image_name'][j]
            image_path = os.path.join(images_folder, image_name)
            image = read_image(image_path)
            with torch.no_grad():
                x = eval_transforms(image)
                x = x.to(device)
                predictions = model([x])
            pred = predictions[0]
            
            # Postprocessing: remove 'narrow' boxes as these are false positives.
            pred_widths = pred['boxes'][:, 2] - pred['boxes'][:, 0]
            pred['boxes'] = pred['boxes'][pred_widths > 10]
            pred['labels'] = pred['labels'][pred_widths > 10]
            pred['scores'] = pred['scores'][pred_widths > 10]
            
            pred['boxes'] = pred['boxes'][pred['scores'] > thresh]
            pred['labels'] = pred['labels'][pred['scores'] > thresh]
            pred['scores'] = pred['scores'][pred['scores'] > thresh]
            
            if len(pred['labels']) == 0:
                continue
            else:
                pred_x = (pred['boxes'][:, 0] + pred['boxes'][:, 2]) / 2
                pred_labels = [reverse_label_mapping[l.item()] for l in pred['labels']]
                num_steps_window = series_rows['num_steps_window'][j]
                if j == 0:
                    prev_num_steps_cumulative = 0
                else:
                    prev_num_steps_cumulative = series_rows['num_steps_cumulative'][j - 1]            
                for k in range(len(pred_labels)): # k is the index of the bounding box (within the image).
                    event = {}
                    step_in_window = (pred_x[k] / 1440) * num_steps_window # All images have width 1,440px.
                    step_in_series = int(prev_num_steps_cumulative + step_in_window)
                    event['series_id'] = series_id
                    event['step'] = step_in_series
                    event['event'] = pred_labels[k]
                    event['score'] = pred['scores'][k].item()
                    all_test_preds.append(event)          
    if len(all_test_preds) > 0:
        submission_df = pd.DataFrame(all_test_preds)
        submission_df = submission_df.sort_values(by=['series_id', 'step']).reset_index(drop=True)
        submission_df['row_id'] = np.arange(len(submission_df))
        submission_df = submission_df[['row_id', 'series_id', 'step', 'event', 'score']]
    else:
        submission_df = pd.DataFrame({'row_id': [], 'series_id': [], 'step': [], 'event': [], 'score': []})
    return submission_df

In [18]:
submission_df = get_submission_df()
submission_df

Unnamed: 0,row_id,series_id,step,event,score
0,0,038441c925bb,148,wakeup,0.069903
1,1,0402a003dae9,148,wakeup,0.261451


In [19]:
!rm -rf /kaggle/working/images

In [20]:
submission_df.to_csv("submission.csv", index=False)