In [1]:
#import sys
#sys.path.append('../model_projections/')

from MapProjectionsFns import *

In [2]:
import netCDF4 as nc
projections = nc.Dataset("envforprojections_2009.nc", "r+", format="NETCDF4")
contemp = projections['contemporary']
monthly = projections['monthly']
annual = projections['annual']

In [3]:
#reorder into expected order 
general_feats = ['latitude', 'longitude', 'depth_sampled']
satellite_feats = ['chl_satellite', 'sst_satellite', 'par_satellite', 'pic_satellite', 'poc_satellite', 'npp_satellite']
monthly_feats = ['chla_monthly_historical', 'cloudfraction_monthly_historical', 'daylength_monthly_historical', 
                 'dustflux_monthly_historical', 'solarinsolation_monthly_historical', 'pycnoclinedepth_monthly_historical',
                 'thermoclinedepth_monthly_historical', 'nitrate_monthly_historical', 'npratio_monthly_historical',
                 'oxygendissolved_monthly_historical', 'oxygensaturation_monthly_historical', 'oxygenutilization_monthly_historical',
                 'phosphate_monthly_historical', 'salinity_monthly_historical', 'silicate_monthly_historical', 'oceantemp_monthly_historical'
                ]
annual_feats = ['chla_annual_historical', 'chla_annualrange_historical', 'cloudfraction_annual_historical', 'cloudfraction_annualstdev_historical',
                'diffuseattenuation_annual_historical', 'par_annual_historical', 'salinity_annual_historical',
                'thermoclinedepth_annualstdev_historical', 'nitrate_annual_historical', 'solarinsolation_annual_historical',
                'distfromland_annual_historical', 'oxygendissolved_annual_historical', 'sst_annual_historical', 
                'pycnoclinedepth_annualstdev_historical', 'solarinsolation_annualstdev_historical', 'oceandepth_historical', 
                'dustflux_annual_historical', 'oxygensaturation_annual_historical', 'dustflux_annualstdev_historical', 
                'oxygenutilization_annual_historical', 'phosphate_annual_historical', 'silicate_annual_historical', 
                'calcite_annual_historical', 'oceantemp_annual_historical', 'ph_annual_historical'
                ]

feat_order = general_feats+satellite_feats+monthly_feats+annual_feats+['intercept']

# create map projections

In [4]:
projnc = nc.Dataset("predictiveness_clusters_projections.nc", "w", format="NETCDF4")
#projnc = nc.Dataset("predictiveclusters_projections_reruns.nc", "r+", format="NETCDF4")

In [14]:
def add_projection(contemp, monthly, annual, 
                   group_name, centroids, 
                   depth_sampled=-0.556543241, satellite_month=0, monthly_month=0, 
                   satellite_feats=satellite_feats, monthly_feats=monthly_feats, annual_feats=annual_feats):

    #create k6 group in nc
    print("creating netcdf group",group_name,"...")
    kGroup = projnc.createGroup(group_name)

    #add dimensions - 
    #contemporary dims should be lat, lon, and time which will be unlimited (but for now 4 layers with quarterly months (Jan 2009, Apr 2009, July 2009, Oct 2009))
    lat = kGroup.createDimension("lat", 2160)
    lon = kGroup.createDimension("lon", 4320)

    #create lat and lon and time variables as 64-bit floats and ints
    latitudes = kGroup.createVariable("lat","f8",("lat",))
    longitudes = kGroup.createVariable("lon","f8",("lon",))

    #assign lat/lon values at 9km res to the lat/lon variables (same as contemp)
    kGroup['lat'][:] = contemp['lat'][:]
    kGroup['lon'][:] = contemp['lon'][:]

    #extract predictions and add to nc
    print("creating feature matrices...")
    feat_matrix = get_feat_matrices(contemp, monthly, annual, 
                                    depth_sampled=depth_sampled, satellite_month=satellite_month, 
                                    monthly_month=monthly_month, satellite_feats=satellite_feats, 
                                    monthly_feats=monthly_feats, annual_feats=annual_feats)
    
    for clust in centroids.columns:
        print('---processing', clust, '---')
        print("scoring the matrix...")
        testgene_scores = get_score_matrix(feat_matrix, weights=centroids[clust])
        print("squishing through the sigmoid...")
        sigmoids = get_sigmoid_matrix(testgene_scores)
        print("Eureka!")
        
        #create a variable in our nc file for the sig if it doesn't exist, else just add data
        if str(clust) in kGroup.variables:
            kGroup[str(clust)][:] = sigmoids
        else:
            kGroup.createVariable(str(clust), "f8", ("lat", "lon"))
            kGroup[str(clust)][:] = sigmoids

## k6

In [9]:
centroids6 = pd.read_csv("centroidsk6.csv", index_col="feature")

add_projection(contemp, monthly, annual, 
               group_name="Jan2009_k6", centroids=centroids6, 
               depth_sampled=-0.556543241, satellite_month=0, monthly_month=0, 
               satellite_feats=satellite_feats, monthly_feats=monthly_feats, annual_feats=annual_feats)

creating netcdf group Jan2009_k6 ...
---processing cluster1 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!


  prediction_matrix[prediction_matrix>=0.5] = 1
  prediction_matrix[prediction_matrix<0.5] = 0


---processing cluster2 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster3 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster4 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster5 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding i

## k4

In [10]:
centroids4 = pd.read_csv("centroidsk4.csv", index_col="feature")

add_projection(contemp, monthly, annual, 
               group_name="Jan2009_k4", centroids=centroids4, 
               depth_sampled=-0.556543241, satellite_month=0, monthly_month=0, 
               satellite_feats=satellite_feats, monthly_feats=monthly_feats, annual_feats=annual_feats)

creating netcdf group Jan2009_k4 ...
---processing cluster1 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!


  prediction_matrix[prediction_matrix>=0.5] = 1
  prediction_matrix[prediction_matrix<0.5] = 0


---processing cluster2 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster3 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster4 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!


## k5

In [11]:
centroids5 = pd.read_csv("centroidsk5.csv", index_col="feature")

add_projection(contemp, monthly, annual, 
               group_name="Jan2009_k5", centroids=centroids5, 
               depth_sampled=-0.556543241, satellite_month=0, monthly_month=0, 
               satellite_feats=satellite_feats, monthly_feats=monthly_feats, annual_feats=annual_feats)

creating netcdf group Jan2009_k5 ...
---processing cluster1 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!


  prediction_matrix[prediction_matrix>=0.5] = 1
  prediction_matrix[prediction_matrix<0.5] = 0


---processing cluster2 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster3 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster4 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster5 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding i

## k7

In [12]:
centroids7 = pd.read_csv("centroidsk7.csv", index_col="feature")

add_projection(contemp, monthly, annual, 
               group_name="Jan2009_k7", centroids=centroids7, 
               depth_sampled=-0.556543241, satellite_month=0, monthly_month=0, 
               satellite_feats=satellite_feats, monthly_feats=monthly_feats, annual_feats=annual_feats)

creating netcdf group Jan2009_k7 ...
---processing cluster1 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!


  prediction_matrix[prediction_matrix>=0.5] = 1
  prediction_matrix[prediction_matrix<0.5] = 0


---processing cluster2 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster3 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster4 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster5 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding i

## k9

In [13]:
centroids9 = pd.read_csv("centroidsk9.csv", index_col="feature")

add_projection(contemp, monthly, annual, 
               group_name="Jan2009_k9", centroids=centroids9, 
               depth_sampled=-0.556543241, satellite_month=0, monthly_month=0, 
               satellite_feats=satellite_feats, monthly_feats=monthly_feats, annual_feats=annual_feats)

creating netcdf group Jan2009_k9 ...
---processing cluster1 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...


  prediction_matrix[prediction_matrix>=0.5] = 1
  prediction_matrix[prediction_matrix<0.5] = 0


Eureka!
---processing cluster2 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster3 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster4 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats
adding intercept
DONE!
scoring the matrix...
squishing through the sigmoid...
making predictions...
Eureka!
---processing cluster5 ---
creating feature matrices...
getting general feats
getting satellite feats
getting monthly historical feats
getting annual historical feats


## close and save

In [15]:
projnc.close()

In [None]:
centroids

In [20]:
for clust in centroids6.columns:
    print(clust)
    centroids6['abs'] = np.abs(centroids6[clust])
    print(centroids6.sort_values('abs', ascending=False)[clust][0:5])

cluster1
feature
sst_annual_historical          1.129485
dustflux_monthly_historical   -0.494388
longitude                      0.490295
ph_annual_historical          -0.410091
par_annual_historical         -0.401255
Name: cluster1, dtype: float64
cluster2
feature
dustflux_monthly_historical          -1.144733
oceantemp_monthly_historical         -0.997688
phosphate_annual_historical          -0.859224
sst_annual_historical                 0.559622
pycnoclinedepth_monthly_historical    0.488912
Name: cluster2, dtype: float64
cluster3
feature
intercept                            -0.497136
oceantemp_monthly_historical         -0.484080
pycnoclinedepth_monthly_historical    0.381975
salinity_monthly_historical           0.345350
pic_satellite                        -0.345302
Name: cluster3, dtype: float64
cluster4
feature
sst_annual_historical                    -1.251535
intercept                                -1.002843
solarinsolation_annualstdev_historical    0.759041
cloudfraction_an

In [25]:
for clust in centroids5.columns:
    print(clust)
    centroids5['abs'] = np.abs(centroids5[clust])
    print(centroids5.sort_values('abs', ascending=False)[clust][0:10])

cluster1
feature
oceantemp_monthly_historical         -1.229621
dustflux_monthly_historical          -0.749405
phosphate_annual_historical          -0.662722
pycnoclinedepth_monthly_historical    0.502970
silicate_annual_historical           -0.402657
intercept                            -0.395469
sst_annual_historical                 0.359480
nitrate_annual_historical            -0.356527
salinity_monthly_historical           0.328024
ph_annual_historical                 -0.296881
Name: cluster1, dtype: float64
cluster2
feature
longitude                                -1.006533
intercept                                -0.860165
par_annual_historical                     0.837770
cloudfraction_monthly_historical         -0.716344
solarinsolation_annualstdev_historical   -0.400064
oceandepth_historical                    -0.357778
chla_annualrange_historical              -0.212551
dustflux_annualstdev_historical          -0.189754
latitude                                 -0.173854
salini

In [23]:
for clust in centroids7.columns:
    print(clust)
    centroids7['abs'] = np.abs(centroids7[clust])
    print(centroids7.sort_values('abs', ascending=False)[clust][0:5])

cluster1
feature
oceantemp_monthly_historical           -1.659877
nitrate_annual_historical              -1.134866
diffuseattenuation_annual_historical    0.681379
silicate_annual_historical             -0.674376
intercept                              -0.552039
Name: cluster1, dtype: float64
cluster2
feature
dustflux_monthly_historical          -1.146993
oceantemp_monthly_historical         -1.000573
phosphate_annual_historical          -0.873037
sst_annual_historical                 0.563133
pycnoclinedepth_monthly_historical    0.490135
Name: cluster2, dtype: float64
cluster3
feature
sst_annual_historical          1.269904
dustflux_monthly_historical   -0.530354
longitude                      0.472052
par_annual_historical         -0.456367
ph_annual_historical          -0.438842
Name: cluster3, dtype: float64
cluster4
feature
sst_annual_historical                    -1.237516
intercept                                -1.000791
solarinsolation_annualstdev_historical    0.775252
cloudf

In [24]:
for clust in centroids4.columns:
    print(clust)
    centroids4['abs'] = np.abs(centroids4[clust])
    print(centroids4.sort_values('abs', ascending=False)[clust][0:5])

cluster1
feature
sst_annual_historical                      0.835441
dustflux_monthly_historical               -0.700363
ph_annual_historical                      -0.397605
longitude                                  0.363905
thermoclinedepth_annualstdev_historical   -0.330468
Name: cluster1, dtype: float64
cluster2
feature
longitude                                -1.004915
intercept                                -0.859995
par_annual_historical                     0.837923
cloudfraction_monthly_historical         -0.716320
solarinsolation_annualstdev_historical   -0.400537
Name: cluster2, dtype: float64
cluster3
feature
diffuseattenuation_annual_historical   -2.325409
longitude                               0.908290
sst_annual_historical                   0.889108
dustflux_monthly_historical            -0.388986
intercept                              -0.352971
Name: cluster3, dtype: float64
cluster4
feature
intercept                                -0.702086
solarinsolation_annualstdev_