## Import Data

In [1]:
import numpy as np
import pandas as pd
import warnings
from datetime import datetime, timedelta
from matplotlib import pyplot as plt


warnings.filterwarnings('ignore')## Import Data## Import Data

## Import Data

In [2]:
telescopes = ['12-meter','alma','apex','aste','iram','jcmt','lmt','sma','smt','spt']

In [3]:
starttime = datetime(2019,10,3,6)
endtime = datetime(2019,10,14,0) # not included
timestamps = np.arange(starttime, endtime, 
                       timedelta(hours=6)).astype(datetime)
databook = {}
for ts in telescopes:
    databook[ts] = dict.fromkeys(timestamps)

In [4]:
for ts in telescopes:
    for t in timestamps:
        filepath = "data/"+ ts +"/"+ t.strftime("%Y%m%d_%H:%M:%S")
        try:
            df = pd.read_csv(filepath, delim_whitespace=True, skiprows = 1, header = None)
            df.columns = ["date", "tau225", "Tb[k]", "pwv[mm]", "lwp[kg*m^-2]","iwp[kg*m^-2]","o3[DU]"]
            df['date'] = pd.to_datetime(df['date'], format = "%Y%m%d_%H:%M:%S")
            databook[ts][t] = df
        except FileNotFoundError:
            databook[ts][t] = None
# databook is a dictionary of dictionaries of dataframes 
# keys: telescope names
# values: dictionaries of dataframes for one telescope
# databook[telescope_name] is a dictionary of dataframes for one telescope
# keys: timestamps when the forecast is made
# values: forecast dataframe (None if missing)

For the baseline model, we calculate the reward function for each telescope on each day using $$f(D, R) = -\frac{1}{T(R)}\sum_{t = 1}^{T(R)}{\tau_{225}(D, t)},$$ where $D$ is the date we are looking at for the weather forecast, $R$ is refers to the specific telescope, and $T(R)$ is the number of times forecasts being made in the telescope's observation timeframe. For each day, we calculate the day's reward by combining the telescopes' rewards in weighted average. $$F(D) = \sum_{i = 1}^N{W_{R_i}\times f(D, R_i)}.$$ Then we make decisions on whether to trigger the day based on the pure values of $F(D)$.

In the discounted model, we calculate $$F(D, r) = {\sum_{i = 1}^N{W_{R_i}\times f(D, R_i)}}\times{(1+r)^D},$$ where $r$ is the discount factor and then make decisions on $F(D,r)$. We multiply rather than divide here because the value is negative. We are going to experiment with different fixed value of $r$, and a function of $r$ depending on the variances in the forecasts, and compare the results with the ground-truth optimal path.

In [10]:
def day_reward(telescope_name, day_current_str, end_day_str, start_time, end_time, \
            use_as_evaluate = False):
    '''
    For the specified telescope, return a dataframe with two columns.
    The first column tells the day in the day window between 
        day_current_str and end_day_str (inclusive).
    The second column tells the average predicted tao225 given the day and the time window between
        start_time and end_time (inclusive).
    
    '''
    split_day_current = day_current_str.split('-')
    split_day_end = end_day_str.split('-') # include this day
    
    day_current = datetime(int(split_day_current[0]),int(split_day_current[1]),int(split_day_current[2]),0)
    day_end = datetime(int(split_day_end[0]),int(split_day_end[1]),int(split_day_end[2])+1,0)
    
    if not use_as_evaluate:
        mask = [t < day_current for t in databook[telescope_name]]
        t_valid = np.array([t for t in databook[telescope_name]])[mask]

        df_all = pd.concat([databook[telescope_name][t] for t in t_valid], axis =0)
    else:
        df_all = pd.concat([databook[telescope_name][t] for t in databook[telescope_name]], axis =0)
        
    df_tau_all = df_all.groupby('date').agg({'tau225':lambda x: list(x)}).reset_index()
    
    df_tau_all['latest'] = df_tau_all['tau225'].apply(lambda x: x[-1]) # baseline only use 
    
    
    df_tau_all = df_tau_all[(df_tau_all.date >= day_current) & (df_tau_all.date < day_end)]
    
    
    # calculate the reward for each day based on the schedule
    df_tau_all['day'] = df_tau_all.date.apply(lambda x: str(x).split(' ')[0])
    df_tau_all['time'] = df_tau_all.date.apply(lambda x: int(str(x).split(' ')[1][0:2]))
    
    df_tau_all = df_tau_all[(df_tau_all.time >= int(start_time)) & (df_tau_all.time <= int(end_time))]
    df_tau_day = pd.DataFrame(-df_tau_all.groupby('day')['latest'].mean())
    
    return df_tau_day

**2. Weighted sum the reward for each telescope according to the total Gbytes.** 
(so far we have not taken the telescopes '12-meter','aste','iram' into account as we haven't found corresponding schedule and weights)

In [12]:
# weight_telescope = [0, 22830.7, 26153.8, 0, 0, 12123.0, 22215.3, 12123.0, 18030.7, 26953.8]

# using the area (radius ** 2) of the telescope as weights 
weight_telescope = [12**2, 73**2, 12**2, 10**2, 30**2, 15**2, 32.5**2, 14.7**2, 10**2, 6**2]
schedule_telescope = [[0,1], [3,13], [3,15], [0,1], [0,1], [10,16], [6,16], [10,16], [8,16], [3,15]]


dict_schedule = dict(zip(telescopes, schedule_telescope))
dict_weight = dict(zip(telescopes, weight_telescope))

In [16]:
def all_day_reward(day_current_str, end_day_str):
    """
    calculate F(D) for D in range(day_current_str, end_day_str)
    """
    telescopes_day_reward = day_reward(telescopes[0], day_current_str, end_day_str, dict_schedule[telescopes[0]][0], dict_schedule[telescopes[0]][1]) * dict_weight[telescopes[0]]
    for i in telescopes[1:]:
        telescopes_day_reward += day_reward(i, day_current_str, end_day_str, dict_schedule[i][0], dict_schedule[i][1]) * dict_weight[i]
    return telescopes_day_reward

In [51]:
def decision_making_single_discount_factor(day_current_str, end_day_str, days_to_trigger, discount = 0):
    # day_current_str: YYYY-MM-DD (str) (included)
    # end_day_str: YYYY-MM-DD (str) (included)
    # days_to_trigger: days to trigger (int)
    each_day_reward = all_day_reward(day_current_str, end_day_str)
    
    # inflate the values on each day
    a = np.array([n * ((1 + 0.1) ** i) for i, n in enumerate(each_day_reward['latest'])])
    
    # select the 'days_to_trigger' number of days having maximum reward values
    days_to_trigger = np.array(each_day_reward.index)[np.argsort(a)[-1:-days_to_trigger-1:-1]]
    if day_current_str in days_to_trigger:
        print('We suggest triggering on today')
    else: 
        print('We DO NOT suggest triggering on today')
    print('And we suggest to trigger by the following sequence: {}'.format(np.array(sorted(days_to_trigger))))

In [35]:
each_day_reward = all_day_reward('2019-10-06', '2019-10-14')['latest']
a = np.array([n * ((1 + 0.1) ** i) for i, n in enumerate(each_day_reward)])
a[np.argsort(a)[::-1]]

array([-6.80036926e+02, -9.71353514e+02, -1.04245207e+03, -1.49160400e+03,
       -1.98844800e+03, -2.18286329e+03, -2.19553177e+03, -1.11348802e+05,
       -2.33361825e+06])

In [38]:
np.argsort(a)[-1:-5:-1]

array([1, 3, 4, 5])

In [50]:
np.array(each_day_reward.index)[np.argsort(a)[-1:-5:-1]]

array(['2019-10-07', '2019-10-09', '2019-10-10', '2019-10-11'],
      dtype=object)

In [59]:
decision_making_single_discount_factor('2019-10-05', '2019-10-14', 5, 0.2)

We suggest triggering on today
And we suggest to trigger by the following sequence: ['2019-10-05' '2019-10-06' '2019-10-07' '2019-10-10' '2019-10-14']


In [60]:
decision_making_single_discount_factor('2019-10-06', '2019-10-14', 4, 0.2)

We DO NOT suggest triggering on today
And we suggest to trigger by the following sequence: ['2019-10-07' '2019-10-09' '2019-10-10' '2019-10-11']


In [None]:
decision_making_single_discount_factor('2019-10-06', '2019-10-14', 4, 0.2)