# Concatenate feature files

In [1]:
import numpy as np
import pandas as pd
from pyhere import here
import glob
import os

In [2]:
def get_merged_files(flist, meta=False, **kwargs):
    if meta:
        file = (
            pd.concat([pd.read_csv(f, **kwargs) for f in flist], axis=0)
            .sort_values(by = ['month'])
            .reset_index(drop = True)
        )
    else:
        file = (
            pd.concat([pd.read_feather(f, **kwargs) for f in flist], axis=0)
            .sort_values(by = ['month'])
            .reset_index(drop = True)
        )
    return file

## Monthly files to yearly

In [6]:
# satellite = "landsat-8-c2-l2"
# satellite = "sentinel-2-l2a"
satellite = "landsat-c2-l2"

country_code = 'ZMB' # 'TZA' # 'NGA'

# points = 15
points = 20

num_features = 1024

# bands_short = "2-3-4"
# bands_short = "2-3-4-8"
# bands_short = "1-2-3-4-5-6-7"
bands_short = "r-g-b-nir-swir16-swir22"
# bands_short = "2-3-4-5-6-7-8-11-12"

year = 2018

feat_name = (f'{satellite}_bands-{bands_short}_{country_code}_{points}'+
             f'k-points_{num_features}-features_{year}')
meta_name = f'{satellite}_{country_code}_{points}k-points_meta_{year}'

join_cols = ['lon', 'lat', 'year',  'month']

print(feat_name, '\n', meta_name, sep="")

landsat-c2-l2_bands-r-g-b-nir-swir16-swir22_ZMB_20k-points_1024-features_2018
landsat-c2-l2_ZMB_20k-points_meta_2018


In [7]:
feat_path = str(here('data', 'random_features', 'partial_features', f'{feat_name}_*.feather'))
feat_files = glob.glob(pathname=feat_path)
feat_df = get_merged_files(feat_files)
feat_df.lon, feat_df.lat = round(feat_df.lon, 5), round(feat_df.lat, 5)
feat_df = feat_df.set_index(join_cols)

feat_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
lon,lat,year,month,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
29.23488,-9.80423,2018,1,0.064830,0.000000,0.002341,2.888232,0.0,1.151326,0.439680,0.000000,0.0,0.000000,...,0.027418,1.735626,5.631755,0.060360,0.531012,0.000000,0.000000,0.444373,2.381027,0.049525
24.29488,-17.42423,2018,1,0.327958,0.000016,0.234384,3.639338,0.0,1.751913,2.795835,0.000000,0.0,0.000000,...,0.089877,2.864153,7.481530,0.018524,0.043519,0.000000,0.000000,0.722523,3.611938,0.003769
24.29488,-17.43423,2018,1,0.307732,0.000000,0.183957,3.584555,0.0,1.741960,2.801121,0.000000,0.0,0.000000,...,0.071243,2.838546,7.438651,0.017723,0.051386,0.000000,0.000000,0.718692,3.569894,0.001564
24.30488,-17.43423,2018,1,0.210404,0.000000,0.129663,3.316901,0.0,1.562108,2.214587,0.000000,0.0,0.000000,...,0.061547,2.564873,6.880312,0.028524,0.018540,0.000000,0.000000,0.776915,3.289055,0.004134
24.30488,-17.44423,2018,1,0.168752,0.000000,0.105511,3.422660,0.0,1.553662,2.032877,0.000000,0.0,0.000000,...,0.062709,2.481069,6.892368,0.027314,0.014062,0.000000,0.000000,0.808258,3.243153,0.002771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27.78488,-14.93423,2018,12,5.524834,6.960501,0.000000,4.882538,0.0,1.716046,0.000000,4.313737,0.0,4.615990,...,0.000000,2.024141,9.139206,0.000000,0.055296,0.000000,0.000000,0.000000,1.640254,0.000000
27.79488,-14.92423,2018,12,5.956071,7.636426,0.000000,5.273995,0.0,1.920931,0.000000,4.598118,0.0,5.147659,...,0.000000,2.124624,9.741798,0.000000,0.020162,0.000000,0.000000,0.000000,1.645400,0.000000
27.77488,-14.91423,2018,12,6.413458,8.261296,0.000000,5.643005,0.0,2.117810,0.000000,5.211589,0.0,5.597459,...,0.000000,2.068165,10.132668,0.000000,0.029051,0.000000,0.000000,0.000000,1.551868,0.000000
27.71488,-14.92423,2018,12,6.701088,8.358335,0.000000,5.665072,0.0,1.969882,0.000000,5.623494,0.0,5.789176,...,0.000000,2.033867,10.097422,0.000000,0.014325,0.000000,0.000000,0.000000,1.408275,0.000000


In [8]:
meta_path = str(here('data', 'random_features', 'partial_meta', f'{meta_name}_*.csv'))
meta_files = glob.glob(pathname=meta_path)
meta_df = get_merged_files(meta_files, meta=True)
meta_df.lon, meta_df.lat = round(meta_df.lon, 5), round(meta_df.lat, 5)
meta_df = meta_df.set_index(join_cols)

meta_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,crop_perc,stac_id,platform,cloud_cover,na_percent
lon,lat,year,month,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
25.87488,-13.49423,2018,1,0.374777,LE07_L2SP_173069_20180108_02_T1,landsat-7,0.00,0.372372
28.80488,-14.31423,2018,1,0.472338,LC08_L2SP_171070_20180102_02_T1,landsat-8,1.12,0.000000
28.79488,-14.33423,2018,1,0.431291,LC08_L2SP_171070_20180102_02_T1,landsat-8,1.12,0.000000
28.74488,-14.34423,2018,1,0.575253,LE07_L2SP_172070_20180101_02_T1,landsat-7,2.00,0.335586
28.75488,-14.33423,2018,1,0.488400,LE07_L2SP_172070_20180101_02_T1,landsat-7,2.00,0.290541
...,...,...,...,...,...,...,...,...
31.51488,-14.31423,2018,12,0.797145,LE07_L2SP_170070_20181205_02_T1,landsat-7,10.00,0.140390
31.51488,-14.33423,2018,12,0.839381,LE07_L2SP_170070_20181205_02_T1,landsat-7,10.00,0.238610
31.54488,-14.32423,2018,12,0.795360,LE07_L2SP_170070_20181205_02_T1,landsat-7,10.00,0.153153
31.50488,-14.28423,2018,12,0.788221,LE07_L2SP_170070_20181205_02_T1,landsat-7,10.00,0.132883


In [9]:
joined_df = meta_df.join(feat_df)
joined_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,crop_perc,stac_id,platform,cloud_cover,na_percent,0,1,2,3,4,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
lon,lat,year,month,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
25.87488,-13.49423,2018,1,0.374777,LE07_L2SP_173069_20180108_02_T1,landsat-7,0.00,0.372372,0.284885,0.000130,0.103816,4.450399,0.000000,...,0.002692,2.478036,8.021198,0.003448,0.083846,0.000000,0.000000,0.382466,3.364400,0.001285
28.80488,-14.31423,2018,1,0.472338,LC08_L2SP_171070_20180102_02_T1,landsat-8,1.12,0.000000,0.506425,0.002581,0.189124,3.825187,0.000000,...,0.056768,3.060266,7.890287,0.005841,0.015724,0.000000,0.000000,0.448032,3.711165,0.002807
28.79488,-14.33423,2018,1,0.431291,LC08_L2SP_171070_20180102_02_T1,landsat-8,1.12,0.000000,0.447196,0.002112,0.176575,3.693271,0.000000,...,0.049733,2.904177,7.600479,0.009633,0.037608,0.000000,0.000000,0.475585,3.543696,0.002157
28.74488,-14.34423,2018,1,0.575253,LE07_L2SP_172070_20180101_02_T1,landsat-7,2.00,0.335586,0.443745,0.005282,0.333076,3.323510,0.000000,...,0.146860,3.271196,7.497014,0.041419,0.020703,0.000000,0.000000,0.728725,3.881253,0.017481
28.75488,-14.33423,2018,1,0.488400,LE07_L2SP_172070_20180101_02_T1,landsat-7,2.00,0.290541,0.481257,0.006160,0.357946,3.152600,0.000000,...,0.186016,3.264683,7.355201,0.069825,0.035903,0.000000,0.000000,0.780721,3.854131,0.011891
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31.51488,-14.31423,2018,12,0.797145,LE07_L2SP_170070_20181205_02_T1,landsat-7,10.00,0.140390,0.714019,0.001313,0.249782,3.675405,0.000000,...,0.271612,4.146795,8.907051,0.005857,0.000000,0.000000,0.000000,1.056700,4.815702,0.000000
31.51488,-14.33423,2018,12,0.839381,LE07_L2SP_170070_20181205_02_T1,landsat-7,10.00,0.238610,0.578265,0.000000,0.373564,3.563783,0.000000,...,0.359935,4.337062,8.870835,0.006620,0.000000,0.000000,0.000000,1.197495,4.954018,0.000000
31.54488,-14.32423,2018,12,0.795360,LE07_L2SP_170070_20181205_02_T1,landsat-7,10.00,0.153153,0.687540,0.000481,0.320257,3.861844,0.000000,...,0.236630,4.233677,9.139064,0.004412,0.000000,0.000000,0.000000,1.067584,4.895113,0.000000
31.50488,-14.28423,2018,12,0.788221,LE07_L2SP_170070_20181205_02_T1,landsat-7,10.00,0.132883,0.721225,0.000000,0.181556,3.923308,0.000000,...,0.183117,4.073356,9.127764,0.002757,0.000000,0.000000,0.000000,1.037979,4.825410,0.000245


In [11]:
na = joined_df[joined_df.isna().any(axis = 1)]
na

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,crop_perc,stac_id,platform,cloud_cover,na_percent,0,1,2,3,4,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
lon,lat,year,month,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1


In [12]:
year_file = str(here('data', 'random_features', satellite, f'{feat_name}.feather'))
joined_df.reset_index().to_feather(year_file)

In [93]:
# import seaborn as sns
# p = sns.heatmap(joined_df.iloc[:,5:])
# p.set(yticklabels=[])