# Question 4: Generalization with zero-shot transfer learning

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from utils import import_data, get_baseline_performance, evaluate_on_task

## Settings

In [None]:
np.random.seed(42)

# import data
data_transfer, data_transfer_std, deltas, delta_min, delta_max, slope, lower_bound, upper_bound, unguided = import_data("../", "intersection_speed", False)
data_transfer = -data_transfer # change the performance to be higher the better. previous performance (average waiting time) was lower the better

## Question 4.1
Suppose your training budget is 5 models. First, let’s consider a simple strategy: a random selection of source tasks. Evaluate random selections of source tasks at least 10 times and plot the mean and variance.

In [None]:
num_transfer_steps = 5
print(f"TODO: Please choose {num_transfer_steps} source tasks from {deltas}")
# Collect baseline performances
oracle_transfer, exhaustive_training, sequential_oracle_training = get_baseline_performance(data_transfer, num_transfer_steps)

In [None]:
# TODO! Please choose five source tasks!
source_tasks_random_1 = []
source_tasks_random_2 = []
source_tasks_random_3 = []
source_tasks_random_4 = []
source_tasks_random_5 = []
source_tasks_random_6 = []
source_tasks_random_7 = []
source_tasks_random_8 = []
source_tasks_random_9 = []
source_tasks_random_10 = []

In [None]:
performance_random_1 = evaluate_on_task(data_transfer, source_tasks_random_1, deltas, num_transfer_steps)
performance_random_2 = evaluate_on_task(data_transfer, source_tasks_random_2, deltas, num_transfer_steps)
performance_random_3 = evaluate_on_task(data_transfer, source_tasks_random_3, deltas, num_transfer_steps)
performance_random_4 = evaluate_on_task(data_transfer, source_tasks_random_4, deltas, num_transfer_steps)
performance_random_5 = evaluate_on_task(data_transfer, source_tasks_random_5, deltas, num_transfer_steps)
performance_random_6 = evaluate_on_task(data_transfer, source_tasks_random_6, deltas, num_transfer_steps)
performance_random_7 = evaluate_on_task(data_transfer, source_tasks_random_7, deltas, num_transfer_steps)
performance_random_8 = evaluate_on_task(data_transfer, source_tasks_random_8, deltas, num_transfer_steps)
performance_random_9 = evaluate_on_task(data_transfer, source_tasks_random_9, deltas, num_transfer_steps)
performance_random_10 = evaluate_on_task(data_transfer, source_tasks_random_10, deltas, num_transfer_steps)

In [None]:
performance_random = [performance_random_1, performance_random_2, performance_random_3, performance_random_4, performance_random_5, performance_random_6, performance_random_7, performance_random_8, performance_random_9, performance_random_10]
performance_random = np.array(performance_random)
performance_random_mean = performance_random.mean(axis=0)
performance_random_std = performance_random.std(axis=0)

In [None]:
# Plot
plt.clf()
plt.rcParams['font.family'] = 'sans-serif'
plt.figure(figsize=(8,6))
# change font size
plt.rcParams.update({'font.size': 12})

plt.plot(range(1,num_transfer_steps+1), oracle_transfer, '--r.', label='Oracle Transfer')
plt.plot(range(1,num_transfer_steps+1), exhaustive_training, '--g.', label='Exhaustive Training')
plt.plot(range(1,num_transfer_steps+1), sequential_oracle_training, '--b.', label='Sequential Oracle Training')
plt.plot(range(1,num_transfer_steps+1), performance_random_mean, '-k.', label='Random selected tasks')
plt.fill_between(range(1,num_transfer_steps+1), performance_random_mean-performance_random_std, performance_random_mean+performance_random_std, color='gray', alpha=0.3, label='Random selected tasks (std)')

plt.xlim((0,num_transfer_steps+1))
plt.ylim((-3.55,-3.2))
plt.legend(loc="lower right", fontsize=10)
plt.ylabel("Average reward")
plt.xlabel("Transfer steps")
plt.grid(color='gray', linestyle='dashed', alpha=0.5)


## Question 4.2
Suppose again that your training budget is 5 models. Without cheating (!), that is, without having access to the training and transfer performance results in advance, which 5 models is most sensible to train, under the assumption of linear generalization gap? 

In [None]:
print(f"TODO: Please choose {num_transfer_steps} source tasks from {deltas}")

In [None]:
# TODO: Please choose five source tasks!
source_tasks_linear = []
performance_linear = evaluate_on_task(data_transfer, source_tasks_linear, deltas, num_transfer_steps)

In [None]:
# Plot
plt.clf()
plt.rcParams['font.family'] = 'sans-serif'
plt.figure(figsize=(8,6))
# change font size
plt.rcParams.update({'font.size': 12})

plt.plot(range(1,num_transfer_steps+1), oracle_transfer, '--r.', label='Oracle Transfer')
plt.plot(range(1,num_transfer_steps+1), exhaustive_training, '--g.', label='Exhaustive Training')
plt.plot(range(1,num_transfer_steps+1), sequential_oracle_training, '--b.', label='Sequential Oracle Training')
plt.plot(range(1,num_transfer_steps+1), performance_random_mean, '-k.', label='Random selected tasks')
plt.fill_between(range(1,num_transfer_steps+1), performance_random_mean-performance_random_std, performance_random_mean+performance_random_std, color='gray', alpha=0.3, label='Random selected tasks (std)')
plt.plot(range(1,num_transfer_steps+1), performance_linear, '-m.', label='Answer to Q4.2')

plt.xlim((0,num_transfer_steps+1))
plt.ylim((-3.55,-3.2))
plt.legend(loc="lower right", fontsize=10)
plt.ylabel("Average reward")
plt.xlabel("Transfer steps")
plt.grid(color='gray', linestyle='dashed', alpha=0.5)


## Question 4.3
Finally, suppose that you don’t know in advance your training budget. Your boss just says “try not to blow the entire department’s budget on this.” For this, try out the “anytime” approach Temporal Transfer Learning (TTL) discussed in class, to provide a good order in which to train models. You are welcome but do not need to implement the full algorithm in code. Manually
writing down the source task array is sufficient. Give at least the first 8 models you would train under the TTL strategy. Report the curve provided by the ipynb notebook.

In [None]:
num_transfer_steps = 8
print(f"TODO: Please choose {num_transfer_steps} source tasks from {deltas}")

In [None]:
# TODO: Please choose eight source tasks you would train under the TTL strategy!
source_tasks_own = []
transfer_design_your_own = evaluate_on_task(data_transfer, source_tasks_own, deltas, num_transfer_steps)

In [None]:
# Collect baseline performances
oracle_transfer, exhaustive_training, sequential_oracle_training = get_baseline_performance(data_transfer, num_transfer_steps)

In [None]:
# Plot
plt.clf()
plt.rcParams['font.family'] = 'sans-serif'
plt.figure(figsize=(8,6))
# change font size
plt.rcParams.update({'font.size': 12})

plt.plot(range(1,num_transfer_steps+1), oracle_transfer, '--r.', label='Oracle Transfer')
plt.plot(range(1,num_transfer_steps+1), exhaustive_training, '--g.', label='Exhaustive Training')
plt.plot(range(1,num_transfer_steps+1), sequential_oracle_training, '--b.', label='Sequential Oracle Training')
plt.plot(range(1,num_transfer_steps+1), transfer_design_your_own, '-k.', label='Design your own!')

plt.xlim((0,num_transfer_steps+1))
plt.ylim((-3.55,-3.2))
plt.legend(loc="lower right", fontsize=10)
plt.ylabel("Average reward")
plt.xlabel("Transfer steps")
plt.grid(color='gray', linestyle='dashed', alpha=0.5)


## Question 4.4
Qualitatively compare and contrast the strategies. What are the pros and cons of the different source task selection strategies?