In [1]:
%matplotlib widget

import configparser
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd

from tathu.io import spatialite, pgis
from tathu import visualizer
from tathu.utils import extractPeriods, file2timestamp, array2raster, getGeoT, geo2grid

In [2]:
# File paths
gld_path = "/home/camilacl/git/amazon-storms-aerosols/data/lightning/GLD_mod/"
radar_path = "/data2/GOAMAZON/radar/sipam_manaus/cptec_cappi/"
init_path = "/home/camilacl/git/amazon-storms-aerosols/data/general/"
clusters_path = "/home/camilacl/git/amazon-storms-aerosols/data/general/clusters_aero_systems_25km.csv"

# Load family
db = pgis.Loader(
    "localhost", "goamazon_geo", "postgres", "postgres", "systems_filtered"
)
# Get systems
names = db.loadNames()
# print(len(names))
# Get dates
dates = db.loadDates()
print(len(dates))

19199


In [72]:
# Get clusters data
clusters_25 = pd.read_csv(clusters_path).set_index('name')
cluster1_25 = clusters_25.loc[clusters_25['clust'] == 1].drop(columns="clust")
cluster2_25 = clusters_25.loc[clusters_25['clust'] == 2].drop(columns="clust")
cluster4_25 = clusters_25.loc[clusters_25['clust'] == 4].drop(columns="clust")
cluster5_25 = clusters_25.loc[clusters_25['clust'] == 5].drop(columns="clust")

In [65]:
cluster1_25.describe()

Unnamed: 0,max reflectivity,max echotop 0 dBZ,max echotop 20 dBZ,max echotop 40 dBZ,max VIL,max VII,max VIWL,GLD strokes,CAPE,CIN,bl relative humidity,v-wind shear,warm cloud depth,total aerosols,ultrafine aerosols,total CCNs
count,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0
mean,54.036364,11.636364,10.318182,6.227273,0.255031,0.12245,0.177635,13.045455,2748.761872,-32.878984,76.305609,-3.602967,3.97235,2723.373212,987.434258,1159.630258
std,2.084545,2.172237,2.437531,1.066004,0.057221,0.077308,0.011735,22.712441,1509.603659,40.132566,6.37384,2.133954,0.383712,1271.860446,835.966885,604.699296
min,49.33,7.0,6.0,4.0,0.145503,0.0,0.145503,0.0,0.001041,-117.464745,61.802906,-6.395021,2.898325,610.8198,24.492088,272.7569
25%,52.5425,10.25,9.0,6.0,0.217081,0.075223,0.173067,0.0,1544.470325,-60.069769,74.398296,-5.095919,3.932626,1551.6969,297.645575,713.094475
50%,54.4,11.5,10.5,6.0,0.255835,0.116083,0.181478,1.5,2433.0117,-6.072368,76.19854,-4.070611,4.075035,3005.3899,660.89661,1001.10085
75%,55.79,13.0,11.0,7.0,0.291471,0.167379,0.18602,13.75,4030.28635,0.0,79.75077,-1.66621,4.175682,3475.7622,1702.743775,1512.65725
max,56.859997,15.0,15.0,8.0,0.368179,0.269407,0.192237,90.0,5302.3003,0.0,86.315575,2.091281,4.48913,5567.075,2325.0552,2897.4177


In [5]:
# -- CS name, duration
query = (
    "SELECT name, elapsed_time FROM (SELECT name, EXTRACT(epoch FROM"
    " (max(date_time) - min(date_time))/60) AS elapsed_time FROM"
    " systems_filtered GROUP BY name) AS duration"
)
names = [q[0] for q in db.query(query)]
durations = [q[1] for q in db.query(query)]

# -- Single params
query = (
    "SELECT DISTINCT ON (name) date_init25 FROM systems_filtered"
    " ORDER BY name, date_time ASC"
)
date_init = [q[0] for q in db.query(query)]

# -- Max, sum params
query = (
    "SELECT MAX(count) FROM systems_filtered"
    " GROUP BY name ORDER BY name ASC"
)
maxarea = [q[0] for q in db.query(query)]

In [66]:
systems = pd.DataFrame(
    {
        'name': names,
        'duration': [float(d) for d in durations],
        'date_init': date_init,
        'max_area': maxarea,
    }
).set_index('name')
systems['hour_init'] = systems.date_init.dt.hour
systems['month_init'] = systems.date_init.dt.month

systems = systems.reindex(
    columns=['duration', 'date_init', 'hour_init', 'month_init', 'max_area'])
systems.columns = [
    'duration', 
    'date_init', 
    'init hour', 
    'init month', 
    'max area', 
]

In [55]:
systems

Unnamed: 0_level_0,duration,date_init,init hour,init month,max area
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4cf98e6c-970d-478e-b402-7fc3b8c10e2b,36.0,NaT,,,819.0
ac592abe-af33-46b5-b145-c1de80073666,12.0,NaT,,,593.0
457531a7-9b7a-4afa-8e82-ce8fbcfc0831,60.0,2014-08-04 20:12:00,20.0,8.0,775.0
51613f72-38d5-4571-bfe7-858f33935e1b,48.0,NaT,,,303.0
276778fa-5e23-4293-93d2-290e14cc3a7e,12.0,NaT,,,458.0
...,...,...,...,...,...
226fbae5-7f99-440d-9c5d-cf26e46a7073,48.0,NaT,,,190.0
64ff44b3-a809-4b6f-97b9-f28448ac7c16,120.0,NaT,,,307.0
630bed79-3acb-4ade-b78f-b5e0b6077866,12.0,2014-03-05 20:12:00,20.0,3.0,328.0
07865dad-489f-4cbd-b1a2-4da3363cf86a,48.0,NaT,,,348.0


In [67]:
cluster1_25 = cluster1_25.join(systems.loc[cluster1_25.index.values], how="left")
cluster1_25 = cluster1_25[
    [
        'date_init', 
        'duration', 
        'sys duration', 
        'init hour', 
        'time of day',
        'init month', 
        'season', 
        'max area', 
        'area', 
        'lifespan', 
        'max reflectivity', 
        'reflectivity', 
        'max echotop 0 dBZ', 
        'max echotop 20 dBZ', 
        'max echotop 40 dBZ', 
        'max VIL', 
        'max VII', 
        'max VIWL',
        'GLD strokes',
        'electrical activity',
        'CAPE', 
        'CIN', 
        'bl relative humidity', 
        'v-wind shear', 
        'warm cloud depth',
        'total aerosols', 
        'ultrafine aerosols', 
        'total CCNs', 
    ]
]

In [69]:
systems.loc[cluster1_25.index.values]

Unnamed: 0_level_0,duration,date_init,init hour,init month,max area
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
457531a7-9b7a-4afa-8e82-ce8fbcfc0831,60.0,2014-08-04 20:12:00,20.0,8.0,775.0
3c579c35-1bf2-4e72-b0b7-bfc9a7bf259e,132.0,2014-10-06 05:24:00,5.0,10.0,182.0
d6f9701e-5ebc-4bd8-990f-5702937553f6,12.0,2014-08-23 01:36:00,1.0,8.0,179.0
587d7e6f-1827-4478-9433-68707c3011c6,12.0,2014-08-12 07:36:00,7.0,8.0,235.0
399ec594-7f49-4228-b57c-7aab5374561f,36.0,2014-09-21 13:00:00,13.0,9.0,138.0
971e3b71-d062-4ed2-bc43-e9bd16d535cb,0.0,2014-08-08 06:36:00,6.0,8.0,110.0
194735a6-c3af-4249-9053-2bbd33b9fc83,144.0,2014-12-30 19:36:00,19.0,12.0,374.0
0c1c27da-73ea-450c-bf6d-e2f1f3a33dd8,36.0,2014-08-13 19:48:00,19.0,8.0,270.0
31fac6af-7374-4ea4-bfd7-c4f076ef1714,144.0,2014-09-24 16:24:00,16.0,9.0,238.0
1fce2618-6f90-487c-b370-d6cd0c309482,60.0,2014-08-27 18:36:00,18.0,8.0,120.0


In [70]:
cluster1_25.describe()

Unnamed: 0,duration,init hour,init month,max area,max reflectivity,max echotop 0 dBZ,max echotop 20 dBZ,max echotop 40 dBZ,max VIL,max VII,max VIWL,GLD strokes,CAPE,CIN,bl relative humidity,v-wind shear,warm cloud depth,total aerosols,ultrafine aerosols,total CCNs
count,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0
mean,74.181818,11.818182,8.636364,270.318182,54.036364,11.636364,10.318182,6.227273,0.255031,0.12245,0.177635,13.045455,2748.761872,-32.878984,76.305609,-3.602967,3.97235,2723.373212,987.434258,1159.630258
std,61.705627,7.261355,1.002162,164.116215,2.084545,2.172237,2.437531,1.066004,0.057221,0.077308,0.011735,22.712441,1509.603659,40.132566,6.37384,2.133954,0.383712,1271.860446,835.966885,604.699296
min,0.0,0.0,8.0,110.0,49.33,7.0,6.0,4.0,0.145503,0.0,0.145503,0.0,0.001041,-117.464745,61.802906,-6.395021,2.898325,610.8198,24.492088,272.7569
25%,27.0,6.0,8.0,166.5,52.5425,10.25,9.0,6.0,0.217081,0.075223,0.173067,0.0,1544.470325,-60.069769,74.398296,-5.095919,3.932626,1551.6969,297.645575,713.094475
50%,54.0,12.5,8.0,216.5,54.4,11.5,10.5,6.0,0.255835,0.116083,0.181478,1.5,2433.0117,-6.072368,76.19854,-4.070611,4.075035,3005.3899,660.89661,1001.10085
75%,132.0,19.0,9.0,314.25,55.79,13.0,11.0,7.0,0.291471,0.167379,0.18602,13.75,4030.28635,0.0,79.75077,-1.66621,4.175682,3475.7622,1702.743775,1512.65725
max,192.0,20.0,12.0,775.0,56.859997,15.0,15.0,8.0,0.368179,0.269407,0.192237,90.0,5302.3003,0.0,86.315575,2.091281,4.48913,5567.075,2325.0552,2897.4177


In [42]:
cluster1_25.corr()

Unnamed: 0,duration,init hour,init month,max area,max reflectivity,max echotop 0 dBZ,max echotop 20 dBZ,max echotop 40 dBZ,max VIL,max VII,max VIWL,GLD strokes,CAPE,CIN,bl relative humidity,v-wind shear,warm cloud depth,total aerosols,ultrafine aerosols,total CCNs
duration,1.0,0.018942,0.458121,0.030055,-0.013498,0.052699,-0.117968,-0.106961,-0.016159,0.043203,-0.014022,-0.058382,0.100646,-0.144759,-0.031187,0.146451,0.252961,-0.14182,-0.179686,-0.034508
init hour,0.018942,1.0,0.090373,0.296471,0.340227,-0.228519,-0.370613,-0.325383,-0.261321,-0.30809,-0.313908,0.134133,0.436813,0.667793,-0.390193,0.036626,-0.514744,-0.268002,0.085965,-0.302093
init month,0.458121,0.090373,1.0,0.042801,-0.194321,0.524103,0.362747,0.243368,0.420184,0.47994,0.291203,0.0206,0.189027,0.114457,-0.155349,0.190925,-0.027493,-0.480011,-0.496532,-0.324209
max area,0.030055,0.296471,0.042801,1.0,0.250031,-0.210978,-0.227662,0.162986,-0.198535,-0.146038,-0.26389,0.572702,0.262012,0.182941,0.021972,-0.056827,0.114916,-0.245303,0.114021,-0.334432
max reflectivity,-0.013498,0.340227,-0.194321,0.250031,1.0,-0.016396,0.054032,0.074107,0.051034,-0.000365,0.094657,0.272622,0.122804,-0.03316,0.108701,0.461291,-0.075844,-0.364187,-0.08219,-0.297679
max echotop 0 dBZ,0.052699,-0.228519,0.524103,-0.210978,-0.016396,1.0,0.904244,0.551498,0.92852,0.965024,0.748993,0.054401,-0.089136,-0.071722,-0.206276,0.252671,-0.16017,-0.197919,-0.245687,-0.027574
max echotop 20 dBZ,-0.117968,-0.370613,0.362747,-0.227662,0.054032,0.904244,1.0,0.648913,0.964726,0.956673,0.757075,0.204438,-0.208755,-0.113157,-0.050148,0.282542,-0.148934,-0.126481,-0.318153,0.049924
max echotop 40 dBZ,-0.106961,-0.325383,0.243368,0.162986,0.074107,0.551498,0.648913,1.0,0.694209,0.657138,0.677756,0.333907,0.16424,0.075839,0.254554,0.3079,-0.039838,0.12937,-0.120961,0.325064
max VIL,-0.016159,-0.261321,0.420184,-0.198535,0.051034,0.92852,0.964726,0.694209,1.0,0.964397,0.837498,0.154715,-0.100728,-0.051204,-0.132751,0.317108,-0.193894,-0.097444,-0.269404,0.098118
max VII,0.043203,-0.30809,0.47994,-0.146038,-0.000365,0.965024,0.956673,0.657138,0.964397,1.0,0.737223,0.226159,-0.114624,-0.071793,-0.160333,0.252841,-0.147803,-0.116666,-0.276144,0.057542


In [71]:
import seaborn as sns

plt.clf()
sns.heatmap(cluster1_25.dropna().corr(), vmin=-1, vmax=1, cmap='PiYG').set_title("Cluster 1, Initiation within 25 km - " + str(cluster1_25.dropna().shape[0]) + " cases")
plt.savefig(
    "/home/camilacl/git/tathu/sipam-tracking/out/goamazon/figs/corr_map_clust1_init25.png",
    dpi=300,
    facecolor="none",
    bbox_inches="tight"
)

In [73]:
cluster2_25 = cluster2_25.join(systems.loc[cluster2_25.index.values], how="left")
cluster2_25 = cluster2_25[
    [
        'date_init', 
        'duration', 
        'sys duration', 
        'init hour', 
        'time of day',
        'init month', 
        'season', 
        'max area', 
        'area', 
        'lifespan', 
        'max reflectivity', 
        'reflectivity', 
        'max echotop 0 dBZ', 
        'max echotop 20 dBZ', 
        'max echotop 40 dBZ', 
        'max VIL', 
        'max VII', 
        'max VIWL',
        'GLD strokes',
        'electrical activity',
        'CAPE', 
        'CIN', 
        'bl relative humidity', 
        'v-wind shear', 
        'warm cloud depth',
        'total aerosols', 
        'ultrafine aerosols', 
        'total CCNs', 
    ]
]

In [74]:
import seaborn as sns

plt.clf()
sns.heatmap(cluster2_25.dropna().corr(), vmin=-1, vmax=1, cmap='PiYG').set_title("Cluster 2, Initiation within 25 km - " + str(cluster2_25.dropna().shape[0]) + " cases")
plt.savefig(
    "/home/camilacl/git/tathu/sipam-tracking/out/goamazon/figs/corr_map_clust2_init25.png",
    dpi=300,
    facecolor="none",
    bbox_inches="tight"
)

In [75]:
cluster4_25 = cluster4_25.join(systems.loc[cluster4_25.index.values], how="left")
cluster4_25 = cluster4_25[
    [
        'date_init', 
        'duration', 
        'sys duration', 
        'init hour', 
        'time of day',
        'init month', 
        'season', 
        'max area', 
        'area', 
        'lifespan', 
        'max reflectivity', 
        'reflectivity', 
        'max echotop 0 dBZ', 
        'max echotop 20 dBZ', 
        'max echotop 40 dBZ', 
        'max VIL', 
        'max VII', 
        'max VIWL',
        'GLD strokes',
        'electrical activity',
        'CAPE', 
        'CIN', 
        'bl relative humidity', 
        'v-wind shear', 
        'warm cloud depth',
        'total aerosols', 
        'ultrafine aerosols', 
        'total CCNs', 
    ]
]

In [76]:
import seaborn as sns

plt.clf()
sns.heatmap(cluster4_25.dropna().corr(), vmin=-1, vmax=1, cmap='PiYG').set_title("Cluster 4, Initiation within 25 km - " + str(cluster4_25.dropna().shape[0]) + " cases")
plt.savefig(
    "/home/camilacl/git/tathu/sipam-tracking/out/goamazon/figs/corr_map_clust4_init25.png",
    dpi=300,
    facecolor="none",
    bbox_inches="tight"
)

In [77]:
cluster5_25 = cluster5_25.join(systems.loc[cluster5_25.index.values], how="left")
cluster5_25 = cluster5_25[
    [
        'date_init', 
        'duration', 
        'sys duration', 
        'init hour', 
        'time of day',
        'init month', 
        'season', 
        'max area', 
        'area', 
        'lifespan', 
        'max reflectivity', 
        'reflectivity', 
        'max echotop 0 dBZ', 
        'max echotop 20 dBZ', 
        'max echotop 40 dBZ', 
        'max VIL', 
        'max VII', 
        'max VIWL',
        'GLD strokes',
        'electrical activity',
        'CAPE', 
        'CIN', 
        'bl relative humidity', 
        'v-wind shear', 
        'warm cloud depth',
        'total aerosols', 
        'ultrafine aerosols', 
        'total CCNs', 
    ]
]

In [78]:
import seaborn as sns

plt.clf()
sns.heatmap(cluster5_25.dropna().corr(), vmin=-1, vmax=1, cmap='PiYG').set_title("Cluster 5, Initiation within 25 km - " + str(cluster5_25.dropna().shape[0]) + " cases")
plt.savefig(
    "/home/camilacl/git/tathu/sipam-tracking/out/goamazon/figs/corr_map_clust5_init25.png",
    dpi=300,
    facecolor="none",
    bbox_inches="tight"
)