# Geotif Infos
## Imports

In [None]:
import os

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from asos import settings

%load_ext autoreload
%autoreload 2

## Get Files

In [None]:
csv = settings.load_file_infos(only_subset=False)
print(len(csv.df))

#csv.df = csv.df[csv.df.index.str.contains('s2_summer')]
csv.save_csv()
csv.df

## Centroids

In [None]:
# centroids
csv.get_centroids()

## Area ID

In [None]:
# area id
csv.df['area_id'] = csv.df.index.str.split('/').str[-1].str.split('_').str[:-1].str.join('_')
csv.save_csv()

csv.df

## Season

In [None]:
# season
csv.df['season'] = csv.df.index.str.split('/').str[1].str.split('_').str[-1]
csv.save_csv()

csv.df

## Category

In [None]:
# category
csv.df['category'] = csv.df.index.str.split('/').str[0]
csv.save_csv()

csv.plot_column('category')

## Labels

In [None]:
# anthropo tiles have label 0
csv.df.loc[csv.df['category'] == 'False', 'label'] = int(0)

# wdpa tiles of categories Ia, Ib and II have label 1
csv.df.loc[csv.df['category'] == 'True', 'label'] = int(1)

csv.save_csv()

csv.plot_column('label')

## Quality Score, Single Temporal Subset, Datasplit

### Quality Score

In [None]:
# read original quality score file
df_quality = pd.read_csv(os.path.join(settings.infos_folder, 'quality_scores.csv')).set_index('imagePath')

# rename columns
df_quality = df_quality.rename(columns={'Autumn_SCORE': 'autumn', 'Spring_SCORE': 'spring', 'Summer_SCORE': 'summer', 'Winter_SCORE': 'winter'})
df_quality = df_quality.rename(columns={'spring': 'qs_spring', 'winter': 'qs_winter', 'summer': 'qs_summer', 'autumn': 'qs_autumn'})
df_quality

### Single Temporal Subset

In [None]:
# read original single temporal subset file
df_subset = pd.read_csv(os.path.join(settings.infos_folder, 'single_temporal_subset.csv')).set_index('imagePath')

# rename columns and entries
df_subset = df_subset.rename(columns={'single_temporal_subset': 'season'})
df_subset['season'] = df_subset['season'].replace({'Autumn': 'autumn', 'Spring': 'spring', 'Summer': 'summer', 'Winter': 'winter'})

df_subset

### Datasplit

In [None]:
# read original datasplit file
df_dataset = pd.read_csv(os.path.join(settings.infos_folder, 'split_IDs.csv'))#.set_index('imagePath')

# make lists
indices_train = df_dataset['train'].values
indices_val = df_dataset['validation'].values
indices_test = df_dataset['test'].values

# remove nan values and convert to int
indices_train = [int(index) for index in indices_train if ~np.isnan(index)]
indices_val = [int(index) for index in indices_val if ~np.isnan(index)]
indices_test = [int(index) for index in indices_test if ~np.isnan(index)]

# convert to df
df_train = pd.DataFrame({'index': indices_train, 'dataset': 'train'}).set_index('index')
df_val = pd.DataFrame({'index': indices_val, 'dataset': 'val'}).set_index('index')
df_test = pd.DataFrame({'index': indices_test, 'dataset': 'test'}).set_index('index')

df_dataset = pd.concat([df_train, df_val, df_test])

df_dataset

## Combine dataframes

In [None]:
df_params = pd.concat([df_quality, df_subset, df_dataset], axis=1)
df_params

### Add to CSV Dataframe

In [None]:
csv.df['quality'] = None
csv.df['subset'] = None
csv.df['dataset'] = None

# very unefficient due to for-loop
for index, row in tqdm(csv.df.iterrows(), total=len(csv.df)):
    area_id = int(row['area_id'])
    season = row['season']
    
    # get quality score, single temporal subset and dataset
    quality = df_params[df_params.index == area_id]['qs_' + season].values[0]
    subset = df_params[df_params.index == area_id]['season'].values[0] == season
    dataset = df_params[df_params.index == area_id]['dataset'].values[0]

    # write into df
    csv.df.loc[csv.df.index == index, 'quality'] = quality
    csv.df.loc[csv.df.index == index, 'subset'] = subset    
    csv.df.loc[csv.df.index == index, 'dataset'] = dataset

csv.save_csv()

csv.df

In [None]:
csv.plot_column('dataset')