In [1]:
import pandas as pd

In [2]:
zone_list = ["42S", "42T", 
             "43S", 
             "44R", 
             "45Q", "45R", 
             "46Q", "46R", 
             "50N", "50P", "50Q", 
             "51L", "51N", "51P", "51Q", 
             "52L", "52N", "52P"]

In [3]:
# Get full meta data
meta_data_full = pd.DataFrame()

for zone in zone_list:
    meta_data = pd.read_csv("/root/tiles/" + zone + "/meta_data_" + zone + ".csv")
    meta_data_full = meta_data_full.append(meta_data)

In [4]:
meta_data_full.shape

(156656, 7)

In [5]:
meta_data_full.groupby("zone").size()

zone
42S    16890
42T     2125
43S    10242
44R    16287
45Q     5672
45R    21683
46Q     9874
46R     5629
50N      287
50P     3489
50Q      208
51L     1967
51N    12112
51P    35083
51Q    11260
52L     1285
52N     1545
52P     1018
dtype: int64

In [6]:
# Write full meta data to tiles directory
meta_data_full.to_csv("/root/tiles/meta_data_full.csv", index=False)

In [7]:
# Combine partial csvs
partial_indicator_full = pd.DataFrame()

for zone in zone_list:
    partial_meta_data = pd.read_csv("/root/partial_indicator_csvs/meta_data_" + zone + ".csv")
    partial_indicator_full = partial_indicator_full.append(partial_meta_data)

In [8]:
# Check partial indicator full size
partial_indicator_full.shape

(156656, 3)

In [9]:
partial_indicator_full.head()

Unnamed: 0,filename,zone,partial
0,sentinel2_composite/transformed_data/42S/341-0...,42S,False
1,sentinel2_composite/transformed_data/42S/341-0...,42S,False
2,sentinel2_composite/transformed_data/42S/341-0...,42S,False
3,sentinel2_composite/transformed_data/42S/341-0...,42S,False
4,sentinel2_composite/transformed_data/42S/341-0...,42S,False


In [10]:
partial_indicator_full = partial_indicator_full.rename(columns={"filename": "filename", 
                                                                "zone": "zone", 
                                                                "partial": "partial_updated"})

In [11]:
partial_indicator_full.head()

Unnamed: 0,filename,zone,partial_updated
0,sentinel2_composite/transformed_data/42S/341-0...,42S,False
1,sentinel2_composite/transformed_data/42S/341-0...,42S,False
2,sentinel2_composite/transformed_data/42S/341-0...,42S,False
3,sentinel2_composite/transformed_data/42S/341-0...,42S,False
4,sentinel2_composite/transformed_data/42S/341-0...,42S,False


In [12]:
# Get counts by partial
partial_indicator_full.groupby("partial_updated").size()

partial_updated
False    156432
True        224
dtype: int64

In [13]:
# Get dataframe of partial images 
partials = partial_indicator_full[partial_indicator_full["partial_updated"] == True]

In [14]:
# Write partial dataframe for checks
partials.to_csv("/root/partial_indicator_csvs/partials.csv", index=False)

In [15]:
# Merge full partial data with full meta data
meta_data_full_updated = meta_data_full.merge(partial_indicator_full, how="left", on=["filename", "zone"])

In [17]:
# Drop old partial column
meta_data_full_updated = meta_data_full_updated.drop(columns=["partial"])

In [18]:
# Write to root directory
meta_data_full_updated.to_csv("/root/tiles/meta_data_full_updated.csv", index=False)

In [22]:
# Write to s3
s3_path = "s3://w210-poverty-mapper/sentinel2_composite/transformed_data/meta_data_full_updated.csv"
meta_data_full_updated.to_csv(s3_path, index=False)