# 1. Process the Soil Moisture Data

<div class="alert alert-block alert-info">
    <b>About:</b>
    This notebook refers to the studies presented in <b>Chapter 5.3</b> of the Ph.D. thesis [3].
    We can not guarantee completeness or correctness of the code.
    If you find bugs or if you have suggestions on how to improve the code, we encourage you to post your ideas as <a href="https://github.com/felixriese/alpaca-processing/issues">GitHub issue</a>.
</div>

In [None]:
import pandas as pd
import geopandas
import matplotlib.pyplot as plt
import matplotlib as mpl

## Data processing

In [None]:
df = pd.read_csv("peru_soilmoisture.csv", index_col=0)
df["datetime"] = pd.to_datetime(df["datetime"], format="%Y-%m-%d %H:%M:%S")

In [None]:
areas = [1, 2, 3, 4, 5]

In [None]:
# Geopandas
crs = {'init' :'epsg:4326'}
gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.long, df.lat), crs=crs)
gdf.head()

In [None]:
# coordinate system
gdf = gdf.to_crs({'init': 'epsg:3857'})
gdf.head()

In [None]:
# export data int shapefiles
export_vars = ["geometry", "soilmoisture_perc"]
export_path = "shapes/"
gdf[export_vars].to_file(export_path+"peru_soilmoisture_full.shp")
for a in areas:
    gdf[gdf["area"]==a][export_vars].to_file(export_path+"peru_soilmoisture_area"+str(a)+".shp")

## Data plotting

In [None]:
norm = mpl.colors.Normalize(vmin=df["soilmoisture_perc"].min(), vmax=40)
cmap = "viridis_r"

In [None]:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

# We restrict to South America.
ax = world[world.name == 'Peru'].plot(
    color='white', edgecolor='black')

# We can now plot our GeoDataFrame.
gdf.plot(ax=ax, color='red')

minx, miny, maxx, maxy = gdf.total_bounds
factor = 0.001
ax.set_xlim(minx*(1+factor), maxx*(1-factor))
ax.set_ylim(miny*(1+factor), maxy*(1-factor))

plt.show()

In [None]:
plot_crs = {'init' :'epsg:4326'}
plot_gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.long, df.lat), crs=plot_crs)
plot_gdf.head()

In [None]:
fontsize = 18

max_sm = 40.
cmap = "viridis_r"

norm = plt.Normalize(vmin=0., vmax=max_sm)

for area in areas:
    
    if area == 2:
        fig, ax = plt.subplots(1, 1, figsize=(7.5, 5))
    else:
        fig, ax = plt.subplots(1, 1, figsize=(6, 6))

    curr_gdf = plot_gdf[(plot_gdf["area"]==area) & (plot_gdf["soilmoisture_perc"]<=max_sm)]
    
    if area == 5:
        curr_gdf = curr_gdf[(curr_gdf["lat"]>-12.0368) & (curr_gdf["long"] <-76.38035)]
    
    curr_gdf.plot(column="soilmoisture_perc", cmap=cmap, ax=ax, norm=norm)
    
    print(area, curr_gdf.shape)
    
    offset = 0.0001
    ax.set_xlim(min(curr_gdf["long"])-offset, max(curr_gdf["long"])+offset)
    ax.set_ylim(min(curr_gdf["lat"])-offset, max(curr_gdf["lat"])+offset)

    ax.set_xlabel("Longitude in degrees", fontsize=fontsize, labelpad=12)
    ax.set_ylabel("Latitude in degrees", fontsize=fontsize, labelpad=12)

    for tick in ax.xaxis.get_major_ticks():
        tick.label.set_fontsize(fontsize)
    for tick in ax.yaxis.get_major_ticks():
        tick.label.set_fontsize(fontsize)
        
    ax.ticklabel_format(useOffset=False)
    plt.xticks(rotation=90)
    
    cax = fig.add_axes([0.9, 0.125, 0.04, 0.755])
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    cbar = fig.colorbar(sm, cax=cax)
    cbar.ax.tick_params(labelsize=fontsize)
    cbar.ax.set_ylabel('Soil moisture in %', fontsize=fontsize, labelpad=12)
    
    # plt.title("Area "+str(area))

    plt.savefig("plots/area"+str(area)+"_map_soilmoisture.pdf", bbox_inches="tight")
    # plt.show()