## PrepareFeatureSet

In order to run machine learning algorithms, we need to prepare feature vectors (tensors) that will be signature extracts.
For this, we need to reformat the data in the data base so that for each parcel, we have the full signature in one row.

We save the result to .csv 

In [6]:
import psycopg2
import pandas as pd

In [None]:
# Conect to the database
conn = psycopg2.connect(
    host="localhost",
    database="postgres",
    user="postgres",
    password="")
cur = conn.cursor()

In [7]:
# Set the table names
parcels_table = "aoi2020"
crop_type_column = "" # Set the crop type column

bs_signatures = "aoi2020_bs_signatures"
s2_signatures = "aoi2020_s2_signatures"
c6_signatures = "aoi2020_c6_signatures"
bs_extracts = "aoi2020_bs_extracts"
date_start = "2019-01-01"
date_end = "2019-10-15"


# Set the folder to store the data
data_folder = ''

The value for table 'parcels_1' is: 'cat2018'.
The value for table 'bs_signatures' is: 'es2018_bs_signatures'.
The value for table 's2_signatures' is: 'es2018_s2_signatures'.
The value for table 'c6_signatures' is: 'es2018_c6_signatures'.
The value for table 'bs_extracts' is: 'es2018_bs_extracts'.
The value for 'date_start' is: '2018-01-01'.
The value for 'date_end' is: '2019-01-01'.


HBox(children=(Label(value='Select the checkbox to change the values.'), Checkbox(value=False, description='Ch…

VBox()

VBox()

In [8]:

selectSql = f"""
    SELECT e1.pid::int as pid, e1.period as period, e1.mean as vv, e2.mean as vh
    FROM {bs_extracts} e1, {bs_extracts} e2
    WHERE e1.pid = e2.pid
    And e1.period = e2.period
    And e1.band = 'VV'
    And e2.band = 'VH';
    """

df = pd.read_sql_query(selectSql, conn)
cur.close()
conn.close()

df[0:10]

Unnamed: 0,pid,period,vv,vh
0,1531,0,0.149679,0.043552
1,1795,0,0.17566,0.032558
2,1804,0,0.121643,0.024553
3,1852,0,0.076359,0.012212
4,2387,0,0.075648,0.011244
5,2847,0,0.152997,0.032659
6,6246,0,0.084058,0.0121
7,8173,0,0.095772,0.020748
8,8246,0,0.082384,0.01717
9,8988,0,0.102454,0.019043


In [9]:
df.set_index('pid', inplace=True)

In [10]:
len(df)

8046689

In [11]:
periods = sorted(df['period'].unique())

periods

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51]

In [12]:
startperiod = 11
endperiod = 54

p0 = df[df['period']==startperiod].copy()
p0.drop(['period'], axis=1, inplace=True)
p0.columns = [f'vv_{startperiod}', f'vh_{startperiod}']

for p in periods[startperiod+1:endperiod]:
    pN = df[df['period']==p].copy()
    pN.drop(['period'], axis=1, inplace=True)
    pN.columns = [f'vv_{p}', f'vh_{p}']

    p0 = p0.join(pN)

p0[0:10]

Unnamed: 0_level_0,vv_11,vh_11,vv_12,vh_12,vv_13,vh_13,vv_14,vh_14,vv_15,vh_15,...,vv_47,vh_47,vv_48,vh_48,vv_49,vh_49,vv_50,vh_50,vv_51,vh_51
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2118,0.115742,0.019568,0.106761,0.022802,0.073332,0.017256,0.093596,0.026564,0.082563,0.022998,...,0.137677,0.024401,0.146635,0.024775,0.152747,0.024763,0.11837,0.023887,0.121044,0.022854
2276,0.163081,0.025636,0.1415,0.028902,0.11895,0.022615,0.146834,0.03493,0.151481,0.030692,...,0.119875,0.030097,0.142815,0.0347,0.196909,0.041987,0.154181,0.031371,0.168172,0.035143
2539,0.088371,0.020628,0.07727,0.016876,0.061039,0.017164,0.074199,0.020374,0.052222,0.020704,...,0.182922,0.02475,0.188327,0.022737,0.174195,0.022811,0.158066,0.020885,0.158181,0.020106
8167,0.124542,0.021086,0.1324,0.025999,0.104094,0.022668,0.132402,0.034207,0.072899,0.024272,...,0.144444,0.018198,0.120079,0.017969,0.141106,0.024328,0.109907,0.017431,0.124694,0.018385
8393,0.116159,0.019968,0.115563,0.015892,0.113459,0.017711,0.142631,0.029332,0.073274,0.020184,...,0.107778,0.018758,0.094248,0.017166,0.094289,0.016339,0.084443,0.013367,0.118864,0.020999
8576,0.091986,0.017822,0.094425,0.018385,0.085235,0.018905,0.111509,0.030461,0.066752,0.020738,...,0.131598,0.016376,0.123264,0.016353,0.168696,0.022347,0.105408,0.012325,0.10696,0.012571
8999,0.126829,0.027815,0.119964,0.025726,0.119701,0.026935,0.139851,0.033535,0.082965,0.025518,...,0.143442,0.023673,0.120766,0.023203,0.132021,0.025522,0.104224,0.019331,0.116216,0.016434
16575,0.096109,0.024463,0.09116,0.020455,0.08619,0.021545,0.104874,0.032239,0.076265,0.026608,...,0.085896,0.01724,0.081434,0.017209,0.087872,0.018846,0.072275,0.013753,0.071198,0.013603
16831,0.141385,0.021356,0.119944,0.018747,0.139212,0.019787,0.265533,0.029006,0.110561,0.017217,...,0.078307,0.019822,0.091036,0.021049,0.095035,0.01755,0.076225,0.016328,0.079961,0.02451
18145,0.131293,0.020672,0.12609,0.019701,0.156516,0.026141,0.314202,0.047127,0.137088,0.01888,...,0.169658,0.028367,0.13814,0.02447,0.145175,0.023973,0.120577,0.021175,0.116564,0.020711


In [13]:
p0.to_csv(f"{data_}{parcels_table}_bs_features.csv")

In [10]:
# Close database connection
database.close_connection()

The connection to the database is now closed.
