# Export air temperature and relative hudmity for validation
- This script is used to export air temperature and relative humidity from simulations in comparison with sensor data;
- Simulations: CNTL, TRAF at UK-Manchester. 

In [1]:
import os
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cftime
import string
import matplotlib.dates as mdates
home_path = '/gws/nopw/j04/duicv/yuansun/'

In [2]:
sensor_id = 'MOD-PM-00427'
df_sensor = pd.read_csv(f'{home_path}0_lcz_mcr/output_analysis/single_point/calibration/adjusted_sensor_data/{sensor_id}.csv')
df_sensor.head()

Unnamed: 0,timestamp,rh_hourly_avg,temp_hourly_avg,hour,month,std_temp_bias,std_rh_bias,rh_hourly_avg_corrected,temp_hourly_avg_temp,temp_hourly_avg_corrected
0,2022-02-21 14:00:00,43.61,17.06,14,2,0.196525,-0.182844,51.583847,13.707289,13.707289
1,2022-02-21 15:00:00,39.354386,18.73386,15,2,0.191343,-0.178718,46.387718,15.149258,15.149258
2,2022-02-21 16:00:00,,,16,2,0.17608,-0.158543,,,
3,2022-02-21 17:00:00,,,17,2,0.156802,-0.136305,,,
4,2022-02-21 18:00:00,,,18,2,0.148477,-0.124433,,,


In [3]:
df_sensor_clean = df_sensor[['timestamp', 'rh_hourly_avg_corrected', 'temp_hourly_avg_corrected']].copy()
df_sensor_clean['timestamp'] = pd.to_datetime(df_sensor_clean['timestamp'])
df_sensor_clean.rename(columns={'rh_hourly_avg_corrected': 'RH2M_obs', 
                                'temp_hourly_avg_corrected': 'TSA_U_obs',
                                'timestamp': 'time'}, inplace=True)
df_sensor_clean.head()

Unnamed: 0,time,RH2M_obs,TSA_U_obs
0,2022-02-21 14:00:00,51.583847,13.707289
1,2022-02-21 15:00:00,46.387718,15.149258
2,2022-02-21 16:00:00,,
3,2022-02-21 17:00:00,,
4,2022-02-21 18:00:00,,


In [4]:
archive = f'{home_path}0_urban_traffic/archive'
start_date = '2012-01-01T00:00:00'
end_date = '2012-12-31T23:00:00'
case_list = ['cntl', 'traffic'] 
GRIDNAME_single = 'UK-MCR' 
var_list = ['TSA_U', 'RH2M']                      

In [11]:
df_all = pd.DataFrame()
for case in case_list:
    ds_case = xr.open_dataset(f'{archive}/{GRIDNAME_single}_{case}/lnd/hist/{GRIDNAME_single}_{case}.clm2.h0.2022-01-01-03600.nc')  
    for var in var_list:
        if var == 'RH2M':
            ds_case_var = ds_case[var] 
        else:    
            ds_case_var = ds_case[var] - 273.15
        df_case_var=ds_case_var.to_dataframe().reset_index()[['time', var]]
        df_case_var.rename(columns={var: f'{var}_{case}'}, inplace=True)
        df_case_var['time'] = df_case_var['time'].dt.round('min').dt.ceil('min')
        df_case_var['time'] = pd.to_datetime(df_case_var['time'])
        if df_all.empty:
            df_all = df_case_var
        else:
            df_all = pd.merge(df_all, df_case_var, how='outer', on='time')    
df_merged = pd.merge(df_sensor_clean, df_all, how='outer', on='time') 
df_merged_clean = df_merged[df_merged['time'] < pd.to_datetime('2023-01-01')]  
df_merged_clean['time'] = df_merged_clean['time'] - pd.Timedelta(hours=1)   
df_merged_clean['hour'] = df_merged_clean['time'].dt.hour
df_merged_clean['minute'] = df_merged_clean['time'].dt.minute
df_merged_clean['diurnal'] = df_merged_clean['hour'].astype(str).str.zfill(2) + ':' + df_merged_clean['minute'].astype(str).str.zfill(2)
df_merged_clean.to_csv(f'./data_for_figure/UK-Manchester.csv', index=False)
df_merged_clean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_merged_clean['time'] = df_merged_clean['time'] - pd.Timedelta(hours=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_merged_clean['hour'] = df_merged_clean['time'].dt.hour
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_merged_clean['minute'] = df_merged_clean['time'].dt.minute
A value is 

Unnamed: 0,time,RH2M_obs,TSA_U_obs,TSA_U_cntl,RH2M_cntl,TSA_U_traffic,RH2M_traffic,hour,minute,diurnal
0,2022-01-01 00:00:00,,,13.521698,87.045792,13.521698,87.045792,0,0,00:00
1,2022-01-01 01:00:00,,,13.719238,86.692062,13.762695,86.596939,1,0,01:00
2,2022-01-01 02:00:00,,,13.786896,86.060379,13.833862,85.941956,2,0,02:00
3,2022-01-01 03:00:00,,,13.437408,84.677002,13.467102,84.599998,3,0,03:00
4,2022-01-01 04:00:00,,,13.146088,83.592514,13.173218,83.525177,4,0,04:00
...,...,...,...,...,...,...,...,...,...,...
8754,2022-12-31 18:00:00,93.899324,9.459280,9.862640,98.107918,10.897675,95.677521,18,0,18:00
8755,2022-12-31 19:00:00,93.965868,9.120794,9.671295,97.848480,10.670898,95.657654,19,0,19:00
8756,2022-12-31 20:00:00,92.233187,8.714747,9.594543,97.927422,10.648438,95.513390,20,0,20:00
8757,2022-12-31 21:00:00,92.049609,8.348432,10.039795,95.563171,10.586060,93.842491,21,0,21:00
