In [None]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.backends.backend_pdf as backend_pdf
import seaborn as sns
%matplotlib inline

import numpy as np
import pandas as pd
import scipy.optimize

from datetime import datetime
import pytz

import gm2
import trfp

sns.set_style('darkgrid')


In [None]:
# need a list of "good stations" to use in the averaging to replace data in the affected stations
trolley_moment_df = pd.read_hdf('60hr_trolley_runs_1.h5', key='run_3956_moment_df')


In [None]:
def split_by_nan(input_array):
    return [input_array[clump] for clump in np.ma.clump_unmasked(np.ma.masked_invalid(input_array))]


In [None]:
%%time

barcode = trfp.STATION_BARCODE_PHI
nomask_df = trolley_moment_df.copy()
mask_df = nomask_df.copy()
temp_avg_df = pd.DataFrame(index=mask_df.index)

veto_extent = 25
split_station = []
all_good_stations = np.arange(6,72)  # not using the inflector stations
no_ground_loop_stations = np.array(range(6,16)+range(64,72))  # vaid for 25 deg veto

# first need to mask when trolley is near each station
for st in range(72):
    stms = ['st' + str(st) + ',m' + str(m+1) for m in range(6)]

    veto_low = (barcode[st]-(veto_extent-3)/2)%360
    veto_high = (barcode[st]+3+(veto_extent-3)/2)%360

    if veto_low < veto_high:
        mask = (nomask_df['tr_phi']>veto_low) & (nomask_df['tr_phi']<veto_high)
    else:  # this happens when wrapping around 360 deg
        mask = (nomask_df['tr_phi']>veto_low) | (nomask_df['tr_phi']<veto_high)

    if mask.iloc[0] & mask.iloc[-1]: split_station += [True]
    else: split_station += [False]

    mask_df[stms] = nomask_df[stms].mask(mask)
    
    # next need to average all good stations that are not within 3 of current station
    if st not in range(16, 23):  # note that these ranged were chosen for 25 deg veto
        averaging_stations = np.delete(all_good_stations,
                                       np.argwhere((np.abs((all_good_stations - st)%72)<=3)
                                                  | (np.abs((all_good_stations - st)%72)>=69))
                                      )
    else:
        averaging_stations = np.delete(no_ground_loop_stations,
                                       np.argwhere((np.abs((no_ground_loop_stations - st)%72)<=3)
                                                  | (np.abs((no_ground_loop_stations - st)%72)>=69))
                                      )
    for m in range(6):  # this will need to go over all moments
        stm = 'st' + str(st) + ',m' + str(m+1)
        avg_stms = ['st'+str(avg_st)+',m'+str(m+1) for avg_st in averaging_stations]
        temp_avg_df[stm] = nomask_df[avg_stms].mean(axis=1).mask(~mask)

replaced_df = mask_df.copy()


# next need to remove the ring wide drift and replace with the station drift
for st in range(72):
    num_moments = len(trfp.STATION_PROBE_ID[st])
    for m in range(num_moments):
        stm = 'st' + str(st) + ',m' + str(m+1)
        num_endpts = 5
        if not split_station[st]:

            inner_splits = split_by_nan(temp_avg_df[stm].values)
            outer_splits = split_by_nan(mask_df[stm].values)
            first_inner_avg = np.mean(inner_splits[0][0:num_endpts])
            last_inner_avg = np.mean(inner_splits[0][-num_endpts:])
            first_outer_avg = np.mean(outer_splits[0][-num_endpts:])
            last_outer_avg = np.mean(outer_splits[1][0:num_endpts])  # these all use 5 values to make extrapolation easier

            inner_delta_y = 0.5 * (last_inner_avg-first_inner_avg)/inner_splits[0].size * (num_endpts-1)
            outer_delta_y = 0.5 * (last_outer_avg-first_outer_avg)/inner_splits[0].size * (num_endpts-1)
            inner_lin_fit = np.linspace(first_inner_avg-inner_delta_y, last_inner_avg+inner_delta_y, num=inner_splits[0].size)
            outer_lin_fit = np.linspace(first_outer_avg+outer_delta_y, last_outer_avg-outer_delta_y, num=inner_splits[0].size)

            replacement_values = inner_splits[0] - inner_lin_fit + outer_lin_fit
            replaced_df[stm][replaced_df[stm].isna()] = np.array(replacement_values)

        else:

            inner_splits = split_by_nan(temp_avg_df[stm].values)
            first_inner_avg = [np.mean(inner_splits[0][0:num_endpts]), np.mean(inner_splits[1][0:num_endpts])]
            last_inner_avg = [np.mean(inner_splits[0][-num_endpts:]), np.mean(inner_splits[1][-num_endpts:])]
            inner_delta_y = [0.5 * (last_inner_avg[0]-first_inner_avg[0])/inner_splits[0].size * (num_endpts-1),
                             0.5 * (last_inner_avg[1]-first_inner_avg[1])/inner_splits[1].size * (num_endpts-1)]

            first_inner_fit = np.linspace(first_inner_avg[0]-inner_delta_y[0],
                                          last_inner_avg[0]+inner_delta_y[0],
                                          num=inner_splits[0].size)
            second_inner_fit = np.linspace(first_inner_avg[1]-inner_delta_y[1],
                                          last_inner_avg[1]+inner_delta_y[1],
                                          num=inner_splits[1].size)

            # use 260 seconds of data after (or before) the vetoed window to make a linear fit to approximate "station drift"
            outer_splits = split_by_nan(mask_df[stm].values)
            dt = 1  # the time step, usually 1 sec, but might as well make it a variable
            num_pts = 260//dt  # integer period of 130 sec signal
            first_outer_fit_coeffs = np.polyfit(np.arange(num_pts)*dt, outer_splits[0][0:num_pts], deg=1)
            second_outer_fit_coeffs = np.polyfit(np.arange(num_pts)*dt, outer_splits[0][-num_pts:], deg=1)
            first_outer_fit = np.polyval(first_outer_fit_coeffs,
                                         np.linspace(inner_splits[0].size*-dt, -dt, num=inner_splits[0].size)
                                        )
            second_outer_fit = np.polyval(second_outer_fit_coeffs,
                                          np.linspace(num_pts*dt, (num_pts+inner_splits[0].size)*dt,
                                                      num=inner_splits[1].size)
                                         )

            first_replacement_values = inner_splits[0] - first_inner_fit + first_outer_fit
            second_replacement_values = inner_splits[1] - second_inner_fit + second_outer_fit
            replacement_values = np.append(first_replacement_values, second_replacement_values)
            replaced_df[stm][replaced_df[stm].isna()] = np.array(replacement_values)


In [None]:
pdf = backend_pdf.PdfPages("new_trolley_footprint_replacement_m1_time.pdf")

m = 0

# x = nomask_df['tr_phi']
x = mask_df.index.values

fig,ax=plt.subplots(4,1)

for st in range(72):
    
    for axis in ax:
        plt.sca(axis)
        plt.cla()

    plt.sca(ax[0])
    plt.plot(x, nomask_df['st'+str(st)+',m'+str(m+1)], '.')
    plt.sca(ax[1])
    plt.plot(x, mask_df['st'+str(st)+',m'+str(m+1)], '.')
    # plt.plot(x_edges, [first_outer_avg, last_outer_avg], '.', color='orange', markersize=10)
    plt.sca(ax[2])
    plt.plot(x, temp_avg_df['st'+str(st)+',m'+str(m+1)], '.')
    # plt.plot(x_edges, [first_inner_avg, last_inner_avg], '.', color='orange', markersize=10)
    plt.sca(ax[3])
    plt.plot(x, replaced_df['st'+str(st)+',m'+str(m+1)], '.')

    for axis in ax:
        axis.set_xlim((np.min(x),np.max(x)))

    ax[0].set_title('station ' + str(st) + ', m' + str(m) + ' raw data')
    ax[1].set_title('vetoed window')
    ax[2].set_title('replacement data from ring avg')
    ax[3].set_title('replacement complete')

    fig.set_size_inches(12, 12)
    fig.tight_layout()
    
    pdf.savefig(fig, dpi=70)

pdf.close()
