# Cross-match L1 and L2 at the TCCONN site in Paris 

- Cross-match between L1 features and L2 $X_{co2}$ lables 

In [1]:
import numpy as np
import pandas as pd
import glob
import sys
import h5py
#from netCDF4 import Dataset
from datetime import datetime
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from scipy.spatial import cKDTree

import pyarrow as pa
import pyarrow.parquet as pq

from functools import reduce
import operator
import gc

In [2]:
# plot settings
plt.rc('font', family='serif') 
plt.rc('font', serif='Times New Roman') 
plt.rcParams.update({'font.size': 16})
plt.rcParams['mathtext.fontset'] = 'stix'

### Read DataFrames

In [3]:
onepdf = pd.read_parquet('/home/shong/work/oco2/data/paris_L1_isite_pandas.parquet.snappy')
twopdf = pd.read_parquet('/home/shong/work/oco2/data/paris_L2_iall_pandas.parquet.snappy')

In [4]:
onepdf.head()

Unnamed: 0,channel_ind,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,time_str,ipx1024nested
0,0,42.051094,2.3677,48.852226,189.712494,0.490401,207.242752,30.967089,2021-07-22T12:55:08.644Z,715921
1,1,42.971222,2.367236,48.854809,195.084808,0.10498,207.23999,30.96925,2021-07-22T12:55:08.646Z,715921
2,2,43.062939,2.36746,48.855442,188.615356,0.108523,207.239822,30.969864,2021-07-22T12:55:08.636Z,715921
3,0,51.605095,2.390275,48.850983,225.645187,1.106636,207.284897,30.973173,2021-07-22T12:55:08.950Z,715921
4,1,53.896103,2.389698,48.853352,245.7854,1.118489,207.28212,30.975107,2021-07-22T12:55:08.953Z,715921


In [5]:
np.unique(onepdf.ipx1024nested.values)

array([715921], dtype=int32)

In [6]:
twopdf.head()

Unnamed: 0,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,xco2,time_str,ipx1024nested
0,57.66209,2.33008,48.842953,357.957825,1.240499,206.971252,30.910488,0.000409,2021-07-22T12:54:39.029Z,715923
1,56.111057,2.311695,48.840572,326.19809,1.413464,206.941391,30.902916,0.000409,2021-07-22T12:54:39.056Z,715922
2,43.401897,2.293364,48.838219,301.024231,0.864409,206.911591,30.895386,0.00041,2021-07-22T12:54:39.083Z,715922
3,60.11515,2.424551,48.853153,185.509872,0.913878,207.128494,30.948095,0.000409,2021-07-22T12:54:39.228Z,715924
4,58.226757,2.406187,48.850311,165.121994,0.548199,207.099075,30.94009,0.000409,2021-07-22T12:54:39.255Z,715924


In [7]:
np.unique(twopdf.ipx1024nested.values)

array([715834, 715835, 715838, 715920, 715921, 715922, 715923, 715924,
       715926], dtype=int32)

- As explained before, L1 data at a `isite` tile will match L2's xco2 in `iall` 9 tiles
- `millisecond` value from `time_str` will be used to match $X_{co2}$ to `L1` data. 

### Datetime for L1 and L2

In [8]:
onepdf['time'] = pd.to_datetime(onepdf['time_str'])

In [9]:
twopdf['time'] = pd.to_datetime(twopdf['time_str'])

#### L1 day: hour: minutes

In [10]:
np.unique(onepdf['time'].dt.day)

array([22])

In [11]:
np.unique(onepdf['time'].dt.hour)

array([12])

In [12]:
np.unique(onepdf['time'].dt.minute)

array([51, 52, 53, 54, 55])

#### L2 day: hour: minutes

In [13]:
np.unique(twopdf['time'].dt.day)

array([22, 31])

In [14]:
np.unique(twopdf['time'].dt.hour)

array([12])

In [15]:
np.unique(twopdf['time'].dt.minute)

array([47, 52, 53, 54, 55])

#### Remove day == 31

In [16]:
len(twopdf.index)

4237

In [17]:
twopdf = twopdf[twopdf['time'].dt.day == 22]

In [18]:
len(twopdf.index)

4236

In [19]:
np.unique(twopdf['time'].dt.day)

array([22])

In [20]:
np.unique(twopdf['time'].dt.minute)

array([52, 53, 54, 55])

#### Match in MilliSecond Scale

In [21]:
onepdf['millisec'] = np.int64(onepdf['time'].dt.day * 24 * 60 * 60000 \
                              + onepdf['time'].dt.hour * 60 * 60000 \
                              + onepdf['time'].dt.minute * 60000 \
                              + onepdf['time'].dt.second * 1000 \
                              + onepdf['time'].dt.microsecond / 1000)

In [22]:
twopdf['millisec'] = np.int64(twopdf['time'].dt.day * 24 * 60 * 60000 \
                              + twopdf['time'].dt.hour * 60 * 60000 \
                              + twopdf['time'].dt.minute * 60000 \
                              + twopdf['time'].dt.second * 1000 \
                              + twopdf['time'].dt.microsecond / 1000)

In [23]:
onepdf[['time_str','millisec']].head()

Unnamed: 0,time_str,millisec
0,2021-07-22T12:55:08.644Z,1947308644
1,2021-07-22T12:55:08.646Z,1947308646
2,2021-07-22T12:55:08.636Z,1947308636
3,2021-07-22T12:55:08.950Z,1947308950
4,2021-07-22T12:55:08.953Z,1947308953


In [24]:
twopdf[['xco2','millisec']].head()

Unnamed: 0,xco2,millisec
0,0.000409,1947279029
1,0.000409,1947279056
2,0.00041,1947279083
3,0.000409,1947279228
4,0.000409,1947279255


In [30]:
def find_closest_row(df, inmillisec):
    inearest = (df['millisec']-inmillisec).abs().idxmin()
    timediff = np.abs(inmillisec - df.loc[inearest].millisec)
    return [inearest, inmillisec,timediff, df.loc[inearest].xco2]

In [31]:
onepdf.head()

Unnamed: 0,channel_ind,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,time_str,ipx1024nested,time,millisec
0,0,42.051094,2.3677,48.852226,189.712494,0.490401,207.242752,30.967089,2021-07-22T12:55:08.644Z,715921,2021-07-22 12:55:08.644000+00:00,1947308644
1,1,42.971222,2.367236,48.854809,195.084808,0.10498,207.23999,30.96925,2021-07-22T12:55:08.646Z,715921,2021-07-22 12:55:08.646000+00:00,1947308646
2,2,43.062939,2.36746,48.855442,188.615356,0.108523,207.239822,30.969864,2021-07-22T12:55:08.636Z,715921,2021-07-22 12:55:08.636000+00:00,1947308636
3,0,51.605095,2.390275,48.850983,225.645187,1.106636,207.284897,30.973173,2021-07-22T12:55:08.950Z,715921,2021-07-22 12:55:08.950000+00:00,1947308950
4,1,53.896103,2.389698,48.853352,245.7854,1.118489,207.28212,30.975107,2021-07-22T12:55:08.953Z,715921,2021-07-22 12:55:08.953000+00:00,1947308953


In [32]:
find_closest_row(twopdf,1947308644)

[913, 1947308644, 2, 0.0004071442]

In [33]:
numonepdf = len(onepdf.index)

In [34]:
for ione in range(numonepdf):
    eachmil = onepdf['millisec'].loc[ione]
    print([ione]+find_closest_row(twopdf,eachmil))

[0, 913, 1947308644, 2, 0.0004071442]
[1, 913, 1947308646, 4, 0.0004071442]
[2, 913, 1947308636, 6, 0.0004071442]
[3, 920, 1947308950, 2, 0.00040813148]
[4, 920, 1947308953, 5, 0.00040813148]
[5, 920, 1947308942, 6, 0.00040813148]
[6, 921, 1947308977, 2, 0.00040761568]
[7, 921, 1947308980, 5, 0.00040761568]
[8, 921, 1947308969, 6, 0.00040761568]
[9, 922, 1947309004, 2, 0.00040689882]
[10, 928, 1947309283, 1, 0.00040961147]
[11, 928, 1947309286, 4, 0.00040961147]
[12, 928, 1947309275, 7, 0.00040961147]
[13, 929, 1947309310, 1, 0.00040843885]
[14, 929, 1947309313, 4, 0.00040843885]
[15, 929, 1947309302, 7, 0.00040843885]
[16, 930, 1947309337, 1, 0.00040849496]
[17, 930, 1947309340, 4, 0.00040849496]
[18, 930, 1947309329, 7, 0.00040849496]
[19, 936, 1947309617, 2, 0.00040757415]
[20, 936, 1947309619, 4, 0.00040757415]
[21, 936, 1947309609, 6, 0.00040757415]
[22, 937, 1947309644, 2, 0.00040838568]
[23, 937, 1947309646, 4, 0.00040838568]
[24, 937, 1947309636, 6, 0.00040838568]
[25, 938, 194

[893, 3280, 1947224617, 2, 0.0004080714]
[894, 3280, 1947224619, 4, 0.0004080714]
[895, 3280, 1947224609, 6, 0.0004080714]
[896, 3285, 1947224897, 2, 0.00041076282]
[897, 3285, 1947224900, 5, 0.00041076282]
[898, 3285, 1947224888, 7, 0.00041076282]
[899, 3286, 1947224923, 2, 0.00040833987]
[900, 3286, 1947224926, 5, 0.00040833987]
[901, 3286, 1947224915, 6, 0.00040833987]
[902, 1062, 1947224950, 2, 0.00040880416]
[903, 1062, 1947224953, 5, 0.00040880416]
[904, 1062, 1947224942, 6, 0.00040880416]
[905, 1067, 1947225230, 2, 0.0004115674]
[906, 1067, 1947225233, 5, 0.0004115674]
[907, 1067, 1947225221, 7, 0.0004115674]
[908, 1068, 1947225256, 1, 0.00040941883]
[909, 1068, 1947225260, 5, 0.00040941883]
[910, 1068, 1947225248, 7, 0.00040941883]
[911, 1069, 1947225283, 1, 0.00040860524]
[912, 1069, 1947225286, 4, 0.00040860524]
[913, 1069, 1947225275, 7, 0.00040860524]
[914, 1074, 1947225563, 1, 0.00041175183]
[915, 1074, 1947225567, 5, 0.00041175183]
[916, 1074, 1947225555, 7, 0.00041175183

[1741, 3321, 1947248302, 7, 0.0004076665]
[1742, 3328, 1947248617, 2, 0.00040966028]
[1743, 3328, 1947248619, 4, 0.00040966028]
[1744, 3328, 1947248609, 6, 0.00040966028]
[1745, 3329, 1947248644, 2, 0.00040699338]
[1746, 3329, 1947248646, 4, 0.00040699338]
[1747, 3329, 1947248636, 6, 0.00040699338]
[1748, 3336, 1947248950, 2, 0.00040913516]
[1749, 3336, 1947248953, 5, 0.00040913516]
[1750, 3336, 1947248942, 6, 0.00040913516]
[1751, 3337, 1947248977, 2, 0.00040853553]
[1752, 3337, 1947248980, 5, 0.00040853553]
[1753, 3337, 1947248969, 6, 0.00040853553]
[1754, 3338, 1947249004, 2, 0.00040869365]
[1755, 3338, 1947249007, 5, 0.00040869365]
[1756, 3338, 1947248996, 6, 0.00040869365]
[1757, 3344, 1947249283, 1, 0.00040971013]
[1758, 3344, 1947249286, 4, 0.00040971013]
[1759, 3344, 1947249275, 7, 0.00040971013]
[1760, 3345, 1947249310, 1, 0.000408713]
[1761, 3345, 1947249313, 4, 0.000408713]
[1762, 3345, 1947249302, 7, 0.000408713]
[1763, 3346, 1947249337, 1, 0.00040950882]
[1764, 3346, 19472

[2673, 2648, 1947199953, 5, 0.00040965513]
[2674, 2648, 1947199942, 6, 0.00040965513]
[2675, 2653, 1947200230, 25, 0.00041047143]
[2676, 2653, 1947200233, 22, 0.00041047143]
[2677, 2653, 1947200221, 34, 0.00041047143]
[2678, 2653, 1947200256, 1, 0.00041047143]
[2679, 2653, 1947200260, 5, 0.00041047143]
[2680, 2653, 1947200248, 7, 0.00041047143]
[2681, 2654, 1947200283, 1, 0.0004087353]
[2682, 2654, 1947200286, 4, 0.0004087353]
[2683, 2654, 1947200275, 7, 0.0004087353]
[2684, 2659, 1947200563, 25, 0.0004080318]
[2685, 2659, 1947200567, 21, 0.0004080318]
[2686, 2659, 1947200555, 33, 0.0004080318]
[2687, 2659, 1947200590, 2, 0.0004080318]
[2688, 2659, 1947200593, 5, 0.0004080318]
[2689, 2659, 1947200582, 6, 0.0004080318]
[2690, 2660, 1947200617, 2, 0.00040974462]
[2691, 2660, 1947200619, 4, 0.00040974462]
[2692, 2660, 1947200609, 6, 0.00040974462]
[2693, 2665, 1947200897, 2, 0.00040956]
[2694, 2665, 1947200900, 5, 0.00040956]
[2695, 2665, 1947200888, 7, 0.00040956]
[2696, 2666, 1947200923

In [39]:
twopdf.loc[4137]

altitude                                40.958088
longitude                                2.371136
latitude                                48.850342
aspect                                 210.118637
slope                                    0.402633
sol_az                                 207.468231
sol_zn                                  31.004734
xco2                                     0.000408
time_str                 2021-07-22T12:55:38.975Z
ipx1024nested                              715921
time             2021-07-22 12:55:38.975000+00:00
millisec                               1947338975
Name: 4137, dtype: object

In [40]:
onepdf.loc[3026:3028]

Unnamed: 0,channel_ind,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,time_str,ipx1024nested,time,millisec
3026,0,39.190475,2.371677,48.847618,231.136429,0.393372,207.471268,31.002483,2021-07-22T12:55:38.977Z,715921,2021-07-22 12:55:38.977000+00:00,1947338977
3027,1,41.480953,2.370752,48.851307,198.480225,0.450054,207.466858,31.00548,2021-07-22T12:55:38.980Z,715921,2021-07-22 12:55:38.980000+00:00,1947338980
3028,2,42.202831,2.370981,48.8521,200.739258,0.364473,207.466568,31.006239,2021-07-22T12:55:38.969Z,715921,2021-07-22 12:55:38.969000+00:00,1947338969


In [41]:
twopdf.describe()

Unnamed: 0,altitude,longitude,latitude,aspect,slope,sol_az,sol_zn,xco2,ipx1024nested,millisec
count,4236.0,4236.0,4236.0,4236.0,4236.0,4236.0,4236.0,4236.0,4236.0,4236.0
mean,56.776127,2.358214,48.844494,189.494919,1.888441,206.756409,30.87579,0.000409,715920.575779,1947243000.0
std,19.899004,0.048171,0.040322,107.175545,4.423931,0.422832,0.078647,1e-06,12.783315,56823.88
min,29.466667,2.249182,48.743355,0.213229,0.045934,205.859238,30.673103,0.000398,715834.0,1947136000.0
25%,41.463314,2.32029,48.808303,86.21907,0.528287,206.421947,30.818332,0.000408,715921.0,1947196000.0
50%,52.162025,2.35838,48.845816,204.867706,0.975038,206.764824,30.877131,0.000409,715922.0,1947244000.0
75%,66.526728,2.395922,48.880304,284.799088,1.810062,207.094391,30.933205,0.00041,715924.0,1947291000.0
max,121.429054,2.484542,48.92812,359.987976,89.395584,207.753464,31.055525,0.000418,715926.0,1947344000.0
