In [None]:
def df_drop(df, condition):
    df = df.drop(df[condition].index)
    return df


In [None]:
def regression_eq_text(df_x, df_y, regress, x_offset=-0.3,y_offset=+0.1):
    return hv.Text(max(df_x)+x_offset, min(df_y)+y_offset, 
                                f'y = {regress[2]:.3f} x + {regress[3]:.3f} \n R = {regress[1]:.3f}')

def regression_sklearn(x,y, color='orange', overlay_values=True):
    """
    Input: Pd.dataframe coloumn
    overlay values: overlay of coefficients on the plot
       
    Return a list having:
    1- all regress element
    2- R coefficient
    3- slope value,
    4- intercept value """
    
    model = LinearRegression()
    x = x.values.reshape(-1,1)
    y = y.values.reshape(-1,1)
    reg = LinearRegression().fit(x,y)
    regress = hv.Slope(reg.coef_[0][0],reg.intercept_[0]).opts(color=color)
    
    if overlay_values == True:
        return [regress,#*hv.Text(max(x)-0.3,min(y)+0.1, 
                        #        f'y = {reg.coef_[0][0]} x + {reg.intercept_[0]}'),
                reg.score(x, y),
                reg.coef_[0][0],
                reg.intercept_[0]]
    else:
        return [regress,reg.score(x, y),
                reg.coef_[0][0],reg.intercept_[0]]

In [None]:
def Root_Mean_Square(col_1,col_2):
    return ((col_1-col_2)**2).mean()**.5

def Percentage_difference(col_1, col_2):
    return (col_1/col_2)*100. - 100.

def distance_from_station(df,lat=76.5145,lon=-68.7432, df_column_latitude='latitude',
                          df_column_longitude='longitude',drop_above=None):
    """
    Add distance bewteen lat/lon point and a pandas dataframe column (kilometers)
    df:dataframe
    lat,lon : decimal degrees of the reference station
    drop_above: filter pandas data above X kilometers (float) 
    """
    df['diff_distance'] = distance_on_unit_sphere(lat,lon,
                                                  lat2=df[f'{df_column_latitude}'],
                                                  lon2=df[f'{df_column_longitude}'])
    if drop_above != None: df = df_drop(df, df.diff_distance > drop_above) 
    return df

def distance_on_unit_sphere(lat,lon,lat2,lon2):
    """Distance in kilometers bewteen a series of points from a single lat lon.
    lat2: one point or a pandas dataframe column name with series [string]
    lon2: one point or a pandas dataframe column name with series [string]

    lat: single lat
    lon: single_lon
    """
    
    # Convert latitude and longitude to
    # spherical coordinates in radians.
    
    phi1 = np.deg2rad(90.0 - lat2)
    phi2 = np.deg2rad(90.0 - lat)

    # theta = longitude
    theta1 = np.deg2rad(lon2)
    theta2 = np.deg2rad(lon)
    
    cos = (np.sin(phi1)*np.sin(phi2)*np.cos(theta1 - theta2) +
    np.cos(phi1)*np.cos(phi2))
    arc = np.arccos( cos )
    radius = 6371 #kilometers
    distance = arc*radius
    return distance

In [None]:
def closest_in_time(df1,df2,var1,var2,delta_time='30m',on='time',lat = 76.5145, lon =-68.7432):
    """df1 e 2 : Dataframe
    var1 e var2 : string
    lat,lon : decimal degrees of the reference station
    """
    df1 = df1.rename(columns={'Time':f'{on}_{var1}'})
    df2 = df2.rename(columns={'Time':f'{on}_{var2}'})
    newdf = pd.merge_asof(df1,df2,on=on,tolerance=pd.Timedelta(delta_time),
                          direction='nearest')
    newdf['diff'] = (newdf[f'{on}_{var1}']-newdf[f'{on}_{var2}']).abs()
    newdf['diff_int'] = newdf['diff'].astype(int)/1000000000
    newdf['diff_IWV'] = (newdf[f'{var1}']-newdf[f'{var2}'])
    newdf['diff_perc_IWV'] = Percentage_difference(newdf[f'{var1}'],
                                                   newdf[f'{var2}'])
    newdf = newdf.sort_values('diff').drop_duplicates(f'{on}_{var2}')
    newdf.dropna(subset='diff',inplace=True)
    newdf = newdf.set_index([f'{on}_{var1}',f'{on}_{var2}'], 
                            drop=False).sort_index()
    newdf['RMSE_IWV'] = Root_Mean_Square(newdf[f'{var1}'],newdf[f'{var2}'])

    return newdf