# Concatenate feature files

In [5]:
import numpy as np
import pandas as pd
from pyhere import here
import glob
import os

In [6]:
meta_files = False
def get_merged_files(flist, meta=False, **kwargs):
    if meta:
        file = (
            pd.concat([pd.read_csv(f, **kwargs) for f in flist], axis=0)
            .sort_values(by = ['month'])
            .reset_index(drop = True)
        )
    else:
        file = (
            pd.concat([pd.read_feather(f, **kwargs) for f in flist], axis=0)
            .sort_values(by = ['month'])
            .reset_index(drop = True)
        )
    return file

## Monthly files to yearly

In [22]:
# satellite = "landsat-8-c2-l2"
# satellite = "sentinel-2-l2a"
satellite = "landsat-c2-l2"

country_code = 'ZMB' # 'TZA' # 'NGA'

# points = 15
points = 20

num_features = 1024

# bands_short = "2-3-4"
# bands_short = "2-3-4-8"
# bands_short = "1-2-3-4-5-6-7"
bands_short = "r-g-b-nir-swir16-swir22"
# bands_short = "2-3-4-5-6-7-8-11-12"

year = 2021

feat_name = (f'{satellite}_bands-{bands_short}_{country_code}_{points}'+
             f'k-points_{num_features}-features_{year}')
meta_name = f'{satellite}_{country_code}_{points}k-points_meta_{year}'

join_cols = ['lon', 'lat', 'year',  'month']

print(feat_name, '\n', meta_name, sep="")

landsat-c2-l2_bands-r-g-b-nir-swir16-swir22_ZMB_20k-points_1024-features_2021
landsat-c2-l2_ZMB_20k-points_meta_2021


In [24]:
feat_path = str(here('data', 'random_features', 'partial_features', f'{feat_name}_*.feather'))
feat_files = glob.glob(pathname=feat_path)
feat_df = get_merged_files(feat_files)
feat_df.lon, feat_df.lat = round(feat_df.lon, 5), round(feat_df.lat, 5)
feat_df = feat_df.set_index(join_cols)

feat_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
lon,lat,year,month,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
31.34488,-14.49423,2021,1,0.127524,0.000000,0.000161,2.848424,0.0,1.205260,1.041520,0.0,0.0,0.000000,...,0.045027,2.012277,5.972591,0.013006,0.245866,0.000000,0.0,0.543145,2.575003,0.006265
32.13488,-13.81423,2021,1,0.049180,0.000005,0.002875,2.969396,0.0,1.281286,0.754469,0.0,0.0,0.000000,...,0.019333,1.976736,6.018360,0.024457,0.169425,0.000000,0.0,0.461495,2.596760,0.004910
32.13488,-13.82423,2021,1,0.062512,0.000000,0.003520,3.010204,0.0,1.276403,0.820515,0.0,0.0,0.000000,...,0.016645,2.004431,6.058318,0.022184,0.156408,0.000000,0.0,0.468380,2.641966,0.002555
32.15488,-13.82423,2021,1,0.001220,0.000000,0.006351,2.433468,0.0,0.889715,0.018460,0.0,0.0,0.000000,...,0.023241,1.481977,4.872440,0.166377,0.757449,0.000000,0.0,0.467851,2.045657,0.104362
32.17488,-13.79423,2021,1,0.002279,0.000000,0.002500,2.255415,0.0,0.821169,0.146269,0.0,0.0,0.000000,...,0.034988,1.632132,4.893525,0.116620,0.447644,0.000000,0.0,0.511642,2.146146,0.043881
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32.58488,-13.59423,2021,12,0.404651,0.000727,0.135616,3.466141,0.0,1.763182,4.410008,0.0,0.0,0.000000,...,0.189042,3.468359,7.993387,0.006603,0.021967,0.000000,0.0,0.836961,4.144438,0.000697
32.59488,-13.59423,2021,12,0.457252,0.000920,0.238713,3.805799,0.0,2.074978,5.350632,0.0,0.0,0.000000,...,0.145418,3.742657,8.701932,0.000339,0.000000,0.000000,0.0,0.839071,4.556325,0.000074
32.65488,-13.55423,2021,12,0.755815,0.000297,0.157565,3.893312,0.0,2.105067,6.210566,0.0,0.0,0.000000,...,0.200583,4.193387,9.238509,0.000005,0.000000,0.000000,0.0,0.935708,4.878699,0.000000
32.56488,-13.55423,2021,12,2.236133,1.798890,0.082081,4.381640,0.0,2.053108,4.245147,0.0,0.0,0.212878,...,0.000577,3.888698,9.630892,0.000000,0.000000,0.000121,0.0,0.000000,4.228803,0.000000


In [None]:
if meta_files:
    meta_path = str(here('data', 'random_features', 'partial_meta', f'{meta_name}_*.csv'))
    meta_files = glob.glob(pathname=meta_path)
    meta_df = get_merged_files(meta_files, meta=True)
    meta_df.lon, meta_df.lat = round(meta_df.lon, 5), round(meta_df.lat, 5)
    meta_df = meta_df.set_index(join_cols)
    meta_df

In [None]:
if meta_files:
    joined_df = meta_df.join(feat_df)
    joined_df

In [None]:
if meta_files:
    na = joined_df[joined_df.isna().any(axis = 1)]
    na

In [25]:
year_file = str(here('data', 'random_features', satellite, f'{feat_name}.feather'))
if meta_files:
    joined_df.reset_index().to_feather(year_file)
else:
    feat_df.reset_index().to_feather(year_file)

In [93]:
# import seaborn as sns
# p = sns.heatmap(joined_df.iloc[:,5:])
# p.set(yticklabels=[])