In [None]:
# Cell for Stage 3: Data Loading and Preprocessing
import tensorflow as tf
import pandas as pd
import os

# Define paths (adjust as needed)
data_dir = 'ucf_crime_dataset'
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')
train_annotations = 'train_annotations.txt'
val_annotations = 'val_annotations.txt'

# Load annotations
train_df = pd.read_csv(train_annotations)  # Assumes CSV-like format with 'image_path', 'anomaly_type' columns
val_df = pd.read_csv(val_annotations)

# Define label parsing function
def parse_labels(annotation):
    anomaly_type = annotation['anomaly_type']  # Integer index (0–12 for anomalies, 13 for normal)
    general_anomaly = 1 if anomaly_type < 13 else 0
    violence = 1 if anomaly_type in [0, 3, 7] else 0  # Adjust indices for Assault, Fighting, Shooting
    property_crime = 1 if anomaly_type in [2, 5, 6, 8] else 0  # Adjust indices for Burglary, Stealing, Shoplifting, Vandalism
    return {'anomaly_type': anomaly_type, 'general_anomaly': general_anomaly, 
            'violence': violence, 'property_crime': property_crime}

# Load and preprocess images
def load_image(image_path, label):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [224, 224])
    img = img / 255.0  # Normalize to [0,1]
    return img, label

# Create tf.data.Dataset
def create_dataset(df, directory, batch_size=16, max_images=10000):
    image_paths = [os.path.join(directory, path) for path in df['image_path']]
    labels = [parse_labels(row) for _, row in df.iterrows()]
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.filter(lambda img, lbl: tf.reduce_all(tf.math.is_finite(img)))
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    dataset = dataset.take(max_images // batch_size)  # Limit to max_images
    return dataset

# Create datasets
train_dataset = create_dataset(train_df, train_dir, batch_size=16, max_images=10000)
val_dataset = create_dataset(val_df, val_dir, batch_size=16, max_images=2000)

# Verify dataset
for img, labels in train_dataset.take(1):
    print(f"Image shape: {img.shape}, Label shapes: { {k: v.shape for k, v in labels.items()} }")

# Stage 4: Single-Task Model for Baseline

In [None]:
# Cell for Stage 4: Single-Task Model for anomaly_type Baseline
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, Model
from sklearn.metrics import roc_auc_score
import numpy as np

# Build single-task model for anomaly_type
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu')(x)
anomaly_type = layers.Dense(13, activation='softmax', name='anomaly_type')(x)
model = Model(inputs=base_model.input, outputs=anomaly_type)

# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
history = model.fit(train_dataset, validation_data=val_dataset, epochs=5, verbose=1)

# Evaluate AUC (One-vs-Rest)
y_true, y_pred = [], []
for img, labels in val_dataset:
    preds = model.predict(img)
    y_true.append(labels['anomaly_type'].numpy())
    y_pred.append(preds)
y_true = np.concatenate(y_true)
y_pred = np.concatenate(y_pred)
auc = roc_auc_score(y_true, y_pred, multi_class='ovr')
print(f"Baseline anomaly_type AUC: {auc:.4f}")

# Stage 5: Multi-Task Model Design

In [None]:
# Cell for Stage 5: Multi-Task Model Design
# Reusing imports from Stage 4
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu', name='dense_512')(x)
# Task-specific heads
anomaly_type = layers.Dense(13, activation='softmax', name='anomaly_type')(x)
general_anomaly = layers.Dense(1, activation='sigmoid', name='general_anomaly')(x)
violence = layers.Dense(1, activation='sigmoid', name='violence')(x)
property_crime = layers.Dense(1, activation='sigmoid', name='property_crime')(x)
model = Model(inputs=base_model.input, outputs=[anomaly_type, general_anomaly, violence, property_crime])

# Compile with weighted losses
losses = {
    'anomaly_type': 'sparse_categorical_crossentropy',
    'general_anomaly': 'binary_crossentropy',
    'violence': 'binary_crossentropy',
    'property_crime': 'binary_crossentropy'
}
loss_weights = {'anomaly_type': 1.0, 'general_anomaly': 0.5, 'violence': 0.5, 'property_crime': 0.5}
model.compile(optimizer='adam', loss=losses, loss_weights=loss_weights, metrics=['accuracy'])

# Summary
model.summary()

# Stage 6: Multi-Task Training and Analysis

In [None]:
# Cell for Stage 6: Multi-Task Training and Analysis
# Compute gradient similarity
def compute_gradient_similarity(model, data):
    with tf.GradientTape() as tape:
        predictions = model(data[0], training=True)
        anomaly_type_loss = tf.reduce_mean(model.losses[0])  # anomaly_type loss
        subtask_losses = [tf.reduce_mean(loss) for loss in model.losses[1:]]
    gradients = tape.gradient([anomaly_type_loss] + subtask_losses, model.trainable_variables)
    anomaly_type_grads = gradients[0]
    subtask_grads = gradients[1:]
    similarities = [tf.reduce_mean(tf.keras.metrics.cosine_similarity(anomaly_type_grads, g)) for g in subtask_grads]
    return similarities

# Train model
history = model.fit(train_dataset, validation_data=val_dataset, epochs=10, verbose=1)

# Analyze gradient similarity
for img, labels in val_dataset.take(1):
    similarities = compute_gradient_similarity(model, (img, labels))
    print(f"Gradient similarities (anomaly_type vs. sub-tasks): {similarities}")

# Store loss history for Stage 9
loss_history = {key: history.history[f'{key}_loss'] for key in ['anomaly_type', 'general_anomaly', 'violence', 'property_crime']}

# Stage 7: Evaluation Metrics

In [None]:
# Cell for Stage 7: Evaluation Metrics
# Evaluate OvR AUC for anomaly_type and binary AUC for sub-tasks
y_true, y_pred = {'anomaly_type': [], 'general_anomaly': [], 'violence': [], 'property_crime': []}, {'anomaly_type': [], 'general_anomaly': [], 'violence': [], 'property_crime': []}
for img, labels in val_dataset:
    preds = model.predict(img)
    y_true['anomaly_type'].append(labels['anomaly_type'].numpy())
    y_true['general_anomaly'].append(labels['general_anomaly'].numpy())
    y_true['violence'].append(labels['violence'].numpy())
    y_true['property_crime'].append(labels['property_crime'].numpy())
    y_pred['anomaly_type'].append(preds[0])
    y_pred['general_anomaly'].append(preds[1])
    y_pred['violence'].append(preds[2])
    y_pred['property_crime'].append(preds[3])

# Compute AUC
for task in y_true:
    y_true[task] = np.concatenate(y_true[task])
    y_pred[task] = np.concatenate(y_pred[task])
    auc = roc_auc_score(y_true[task], y_pred[task], multi_class='ovr' if task == 'anomaly_type' else None)
    print(f"{task} AUC: {auc:.4f}")

# Stage 8: Ablation Study

In [None]:
# Cell for Stage 8: Ablation Study
# Define task combinations
combinations = [
    ['anomaly_type'],
    ['anomaly_type', 'general_anomaly'],
    ['anomaly_type', 'violence', 'property_crime'],
    ['anomaly_type', 'general_anomaly', 'violence', 'property_crime']
]

# Run ablation study
auc_results = {}
for combo in combinations:
    # Build model for combination
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    outputs = []
    if 'anomaly_type' in combo:
        outputs.append(layers.Dense(13, activation='softmax', name='anomaly_type')(x))
    if 'general_anomaly' in combo:
        outputs.append(layers.Dense(1, activation='sigmoid', name='general_anomaly')(x))
    if 'violence' in combo:
        outputs.append(layers.Dense(1, activation='sigmoid', name='violence')(x))
    if 'property_crime' in combo:
        outputs.append(layers.Dense(1, activation='sigmoid', name='property_crime')(x))
    model = Model(inputs=base_model.input, outputs=outputs)
    
    # Compile
    losses = {out.name: 'sparse_categorical_crossentropy' if out.name == 'anomaly_type' else 'binary_crossentropy' for out in outputs}
    loss_weights = {out.name: 1.0 if out.name == 'anomaly_type' else 0.5 for out in outputs}
    model.compile(optimizer='adam', loss=losses, loss_weights=loss_weights, metrics=['accuracy'])
    
    # Train
    model.fit(train_dataset, validation_data=val_dataset, epochs=5, verbose=0)
    
    # Evaluate anomaly_type AUC
    y_true, y_pred = [], []
    for img, labels in val_dataset:
        preds = model.predict(img)
        y_true.append(labels['anomaly_type'].numpy())
        y_pred.append(preds[0] if len(combo) == 1 else preds[combo.index('anomaly_type')])
    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)
    auc = roc_auc_score(y_true, y_pred, multi_class='ovr')
    auc_results[str(combo)] = auc
    print(f"AUC for {combo}: {auc:.4f}")

print("Ablation study results:", auc_results)

# Stage 9: Task Relationship Visualization

In [None]:
# Cell for Stage 9: Task Relationship Visualization
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.manifold import TSNE

# 1. Gradient Similarity Heatmap
tasks = ['anomaly_type', 'general_anomaly', 'violence', 'property_crime']
sim_matrix = np.zeros((len(tasks), len(tasks)))
for img, labels in val_dataset.take(1):
    similarities = compute_gradient_similarity(model, (img, labels))  # From Stage 6
    sim_matrix[0, 1:] = similarities
sns.heatmap(sim_matrix, xticklabels=tasks, yticklabels=tasks, annot=True, cmap='Blues')
plt.title('Gradient Similarity Between Tasks')
plt.savefig('gradient_similarity.png')
plt.show()

# 2. Loss Correlation Matrix
loss_df = pd.DataFrame(loss_history)  # From Stage 6
corr_matrix = loss_df.corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Task Loss Correlation')
plt.savefig('loss_correlation.png')
plt.show()

# 3. t-SNE Feature Visualization
activation_model = Model(inputs=model.input, outputs=model.get_layer('dense_512').output)
features, y_true = [], []
for img, labels in val_dataset:
    features.append(activation_model.predict(img))
    y_true.append(labels['anomaly_type'].numpy())
features = np.concatenate(features)
y_true = np.concatenate(y_true)
tsne = TSNE(n_components=2, random_state=42).fit_transform(features)
plt.scatter(tsne[:, 0], tsne[:, 1], c=y_true, cmap='viridis')
plt.title('t-SNE of Shared Layer Features by Anomaly Type')
plt.savefig('tsne_features.png')
plt.show()