In [None]:
def df_drop(df, condition):
    df = df.drop(df[condition].index)
    return df

In [51]:
def regression_eq_text(df_x, df_y, regress, x_offset=-0.3,y_offset=+0.1):
    return hv.Text(max(df_x)+x_offset, min(df_y)+y_offset, 
                                f'y = {regress[2]:.3f} x + {regress[3]:.3f} \n R = {regress[1]:.3f}')

def regression_sklearn(x,y, color='orange', overlay_values=True):
    """
    Input: Pd.dataframe coloumn
    overlay values: overlay of coefficients on the plot
       
    Return a list having:
    1- all regress element
    2- R coefficient
    3- slope value,
    4- intercept value """
    
    model = LinearRegression()
    x = x.values.reshape(-1,1)
    y = y.values.reshape(-1,1)
    reg = LinearRegression().fit(x,y)
    regress = hv.Slope(reg.coef_[0][0],reg.intercept_[0]).opts(color=color)
    
    if overlay_values == True:
        return [regress,#*hv.Text(max(x)-0.3,min(y)+0.1, 
                        #        f'y = {reg.coef_[0][0]} x + {reg.intercept_[0]}'),
                reg.score(x, y),
                reg.coef_[0][0],
                reg.intercept_[0]]
    else:
        return [regress,reg.score(x, y),
                reg.coef_[0][0],reg.intercept_[0]]

In [4]:
def Root_Mean_Square(col_1,col_2):
    return ((col_1-col_2)**2).mean()**.5

def Percentage_difference(col_1, col_2):
    return (col_1/col_2)*100. - 100.
    
def distance_on_unit_sphere(df,point_latitude,point_longitude, df_column_latitude='latitude', 
                            df_column_longitude='longitude'):
    """Distance in kilometers bewteen a series of points from a single lat lon.
    df: dataframe having series of points
    df_column_latitude: column name with series [string]
    df_column_longitude: column name with series [string]
    point_latitude: single lat
    point_longitude: single_lon
    """
    
    # Convert latitude and longitude to
    # spherical coordinates in radians.
    degrees_to_radians = np.pi/180.0

    # phi = 90 - latitude
    phi1 = (90.0 - df['latitude'])*degrees_to_radians
    phi2 = (90.0 - point_latitude)*degrees_to_radians

    # theta = longitude
    theta1 = df['longitude']*degrees_to_radians
    theta2 = point_longitude*degrees_to_radians   
    
    cos = (np.sin(phi1)*np.sin(phi2)*np.cos(theta1 - theta2) +
    np.cos(phi1)*np.cos(phi2))
    arc = np.arccos( cos )
    radius = 6371 #kilometers
    distance = arc*radius
    return distance

In [None]:
def closest_in_time(df1,df2,delta_time='30m'):
    """delta_time : string
    add RSME to new_df
    """
    newdf=pd.merge_asof(df1,df2,on='time',tolerance=pd.Timedelta(delta_time),direction='nearest')
    newdf['diff']=(newdf.time-newdf.time2).abs()
    newdf['diff_int']=newdf['diff'].astype(int)/1000000000
    newdf['diff_IWV']=(newdf.IWV_MODIS-newdf.IWV_THAAO)
    newdf['diff_perc_IWV']=(newdf.IWV_MODIS/newdf.IWV_THAAO)*100. - 100.
    newdf['diff_distance']=np.sqrt((newdf.latitude-76.5145)**2 + (newdf.longitude-68.7432)**2)
    newdf=newdf.sort_values('diff').drop_duplicates('key')
    newdf=newdf.set_index(['time','time2'], drop=False).sort_index()
    newdf.index=newdf.index.rename(['Time','Time2'])
    newdf=newdf[1:]
    newdf['RMSE_IWV']=((newdf.IWV_MODIS-newdf.IWV_THAAO)**2).mean()**.5
    newdf.info()
    newdf.head()
    return newdf

def closest_in_time2(df1,df2,var1,var2,delta_time='30m',on='time',lat = 76.5145, lon =-68.7432):
    """df1 e 2 : Dataframe
    var1 e var2 : string
    lat,lon : decimal degrees of the reference station
    """
    df1 = df1.rename(columns={'Time':f'{on}_{var1}'})
    df2 = df2.rename(columns={'Time':f'{on}_{var2}'})
    newdf = pd.merge_asof(df1,df2,on=on,tolerance=pd.Timedelta(delta_time),
                          direction='nearest')
    newdf['difference'] = (newdf[f'{on}_{var1}']-newdf[f'{on}_{var2}']).abs()
    newdf['diff_int'] = newdf['difference'].astype(int)/1000000000
    newdf['diff_perc_IWV'] = Percentage_difference(newdf[f'{var1}'],
                                                   newdf[f'{var2}'])
    newdf = newdf.sort_values('difference').drop_duplicates(f'{on}_{var2}')
    newdf.dropna(subset='difference',inplace=True)
    newdf = newdf.set_index([f'{on}_{var1}',f'{on}_{var2}'], 
                            drop=False).sort_index()
    newdf['RMSE_IWV'] = Root_Mean_Square(newdf[f'{var1}'],newdf[f'{var2}'])
    newdf['diff_distance'] = distance_on_unit_sphere(newdf,lat,lon)

    return newdf