# Export sensor bias
- This script is used to calculate diurnal mean sensor bias by month.

In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
home_path = '/gws/nopw/j04/duicv/yuansun/'
output_dir = home_path + '0_lcz_mcr/output_analysis/single_point/calibration/'

In [2]:
sensor_list = ['MOD-PM-00454', 'MOD-PM-00459', 'MOD-PM-00461']
sensor_datapath = home_path + 'manchester/sensor/analyse_variable/temperature/tem_rh_hourly/'
meta_data = pd.read_csv(home_path + 'manchester/sensor/meta_data/meta_data_final_for_analyse.csv')
meta_data.head(2)

Unnamed: 0,sn,start_time,end_time,lat,lon,TB notes,Still active,data_start_time,data_end_time,analyse_start_time,analyse_end_time
0,MOD-PM-00097,2021-01-12 05:20:00,2024-01-15 11:56:00,53.48402,-2.270676,,No,2021-02-01 16:31:00,2024-01-15 11:56:00,2022-08-11 12:57:14,2024-01-15 11:56:00
1,MOD-PM-00342,2021-09-08 15:26:00,2024-06-07 20:36:00,53.504016,-2.217164,Delete data prioir to the 19th of April 2023,No,2021-09-08 20:59:00,2024-06-07 20:36:00,2023-04-19 12:27:34,2024-06-07 20:36:26


In [3]:
supersite_data = pd.read_csv(home_path + 'manchester/supersite/hourly_data/final_supersite_merged.csv')
supersite_data['Datetime'] = pd.to_datetime(supersite_data['Datetime'])
supersite_data.rename(columns={'Datetime': 'timestamp'}, inplace=True)
supersite_data.head(2)

Unnamed: 0,timestamp,RAIN,FSDS,PBOT,TBOT,RH,WIND
0,2019-07-19 09:00:00,,,,,,0.802263
1,2019-07-19 10:00:00,,,668.3695,9.719147,63.61148,1.116556


In [4]:
sliced_supersite_data = supersite_data[(supersite_data['timestamp'] >= '2022-07-18 15:00:00') & (supersite_data['timestamp'] <= '2024-06-07 20:00:00')]
sliced_supersite_data = sliced_supersite_data[['timestamp', 'TBOT', 'RH']].rename(columns={'TBOT': 'temp_hourly_avg', 'RH': 'rh_hourly_avg'}).set_index('timestamp')
sliced_supersite_data.head(2)

Unnamed: 0_level_0,temp_hourly_avg,rh_hourly_avg
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-07-18 15:00:00,36.353485,15.856182
2022-07-18 16:00:00,36.716064,16.948933


In [5]:
print(sliced_supersite_data['temp_hourly_avg'].min(), 
      sliced_supersite_data['temp_hourly_avg'].max(), 
      sliced_supersite_data['rh_hourly_avg'].min(), 
      sliced_supersite_data['rh_hourly_avg'].max())

-7.297149700000001 38.40674 15.856182 100.0


In [6]:
sliced_supersite_data_datetime = sliced_supersite_data.copy()
sliced_supersite_data_datetime['hour'] = sliced_supersite_data_datetime.index.hour
sliced_supersite_data_datetime['month'] = sliced_supersite_data_datetime.index.month
sliced_supersite_data_datetime.head(2)

Unnamed: 0_level_0,temp_hourly_avg,rh_hourly_avg,hour,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-07-18 15:00:00,36.353485,15.856182,15,7
2022-07-18 16:00:00,36.716064,16.948933,16,7


In [10]:
bias_list = []
#sensor_data_list = []
for sensor in sensor_list:
    sensor_data = pd.read_csv(sensor_datapath + sensor + '.csv', parse_dates=['timestamp'])
    sensor_data_values = sensor_data.set_index('timestamp')
    bias = sensor_data.set_index('timestamp') - sliced_supersite_data
    bias_list.append(bias)
    #sensor_data_list.append(sensor_data_values)
all_biases = pd.concat(bias_list, axis=1)
mean_bias_tem = pd.DataFrame(all_biases['temp_hourly_avg'].mean(axis=1), columns=['mean_bias'])
mean_bias_rh = pd.DataFrame(all_biases['rh_hourly_avg'].mean(axis=1), columns=['mean_bias'])
mean_bias_tem['hour'] = mean_bias_tem.index.hour
mean_bias_tem['month'] = mean_bias_tem.index.month
mean_bias_tem.to_csv(output_dir + 'mean_bias_tem.csv')
mean_bias_rh['hour'] = mean_bias_rh.index.hour
mean_bias_rh['month'] = mean_bias_rh.index.month
mean_bias_rh.to_csv(output_dir + 'mean_bias_rh.csv')
'''
all_sensor_data = pd.concat(sensor_data_list, axis=1)
mean_sensor_data_tem = pd.DataFrame(all_sensor_data['temp_hourly_avg'].mean(axis=1), columns=['mean_sensor_data'])
mean_sensor_data_rh = pd.DataFrame(all_sensor_data['rh_hourly_avg'].mean(axis=1), columns=['mean_sensor_data'])
mean_sensor_data_tem['hour'] = mean_sensor_data_tem.index.hour
mean_sensor_data_rh['hour'] = mean_sensor_data_rh.index.hour
mean_sensor_data_tem['month'] = mean_sensor_data_tem.index.month
mean_sensor_data_rh['month'] = mean_sensor_data_rh.index.month
'''

"\nall_sensor_data = pd.concat(sensor_data_list, axis=1)\nmean_sensor_data_tem = pd.DataFrame(all_sensor_data['temp_hourly_avg'].mean(axis=1), columns=['mean_sensor_data'])\nmean_sensor_data_rh = pd.DataFrame(all_sensor_data['rh_hourly_avg'].mean(axis=1), columns=['mean_sensor_data'])\nmean_sensor_data_tem['hour'] = mean_sensor_data_tem.index.hour\nmean_sensor_data_rh['hour'] = mean_sensor_data_rh.index.hour\nmean_sensor_data_tem['month'] = mean_sensor_data_tem.index.month\nmean_sensor_data_rh['month'] = mean_sensor_data_rh.index.month\n"

In [9]:
mean_bias_tem.head(2)

Unnamed: 0_level_0,mean_bias,hour,month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-07-18 15:00:00,2.655237,15,7
2022-07-18 16:00:00,4.134547,16,7
