In [1]:
#for Netcdf manipulation
import xarray as xr
from netCDF4 import Dataset
import netCDF4

#for array manipulation
import numpy as np
import pandas as pd

from scipy import stats

In [2]:
# read .grd file

# convert the grads file to netcdf file (terminal)
#cdo -f nc import_binary wrfout_d02.ctl wrfout_d02.nc

dataDIR = 'data/grid/wrfout_d02.nc'
Data = xr.open_dataset(dataDIR, decode_times=False)
#Data

In [3]:
#variable (perturbation potential temperature)
dataTemp = Data['t']

#selecting a layer (no empty values)
dataTemp = dataTemp.sel(lon=np.arange(270,290,0.25),lat=np.arange(17,27,0.25), lev_2=1000, method='nearest').dropna(dim='time')
air_temperature = np.array(dataTemp.values)
air_temperature = air_temperature.flatten()
#air_temperature.shape

In [4]:
#variable (temperature at 2 M)
dataTemp = Data['t2']

#selecting a layer (no empty values)
dataTemp = dataTemp.sel(lon=np.arange(270,290,0.25),lat=np.arange(17,27,0.25), lev=1000, method='nearest').dropna(dim='time')
air_temperature_2m = np.array(dataTemp.values)
air_temperature_2m.shape
air_temperature_2m = air_temperature_2m.flatten()
#air_temperature_2m.shape

In [5]:
#variable (sfc pressure)
dataTemp = Data['psfc']

#selecting a layer (no empty values)
dataTemp = dataTemp.sel(lon=np.arange(270,290,0.25),lat=np.arange(17,27,0.25), lev=1000, method='nearest').dropna(dim='time')
sfc_pressure = np.array(dataTemp.values)
sfc_pressure = sfc_pressure.flatten()
#sfc_pressure.shape

In [6]:
#variable (accumulated total cumulus precipitation)
dataTemp = Data['rainc']

#selecting a layer (no empty values)
dataTemp = dataTemp.sel(lon=np.arange(270,290,0.25),lat=np.arange(17,27,0.25), lev=1000, method='nearest').dropna(dim='time')
total_c_precipitation = np.array(dataTemp.values)
total_c_precipitation = total_c_precipitation.flatten()
#total_c_precipitation.shape

In [7]:
#variable (accumulated total grid scale precipitation)
dataTemp = Data['rainnc']

#selecting a layer (no empty values)
dataTemp = dataTemp.sel(lon=np.arange(270,290,0.25),lat=np.arange(17,27,0.25), lev=1000, method='nearest').dropna(dim='time')
total_g_s_precipitation = np.array(dataTemp.values)
total_g_s_precipitation = total_g_s_precipitation.flatten()
#total_g_s_precipitation.shape

In [76]:
#Dataset from selected variables

values = {"Perturbation p. temperature": air_temperature,
          "Temperature 2m": air_temperature_2m,
          "Surface pressure": sfc_pressure,
          "Total c. precipitation": total_c_precipitation,
          "Total g. scale precipitation": total_g_s_precipitation
    
}

df_data = pd.DataFrame(values)
#df_data

Unnamed: 0,Perturbation p. temperature,Temperature 2m,Surface pressure,Total c. precipitation,Total g. scale precipitation
0,-0.392204,299.706024,101238.914062,0.0,0.0
1,-0.392204,299.706024,101238.914062,0.0,0.0
2,-0.392204,299.706024,101238.914062,0.0,0.0
3,-0.392204,299.706024,101238.914062,0.0,0.0
4,-0.392204,299.706024,101238.914062,0.0,0.0
...,...,...,...,...,...
233595,-1.034394,300.883545,101868.335938,0.0,0.0
233596,-0.844698,301.030701,101863.734375,0.0,0.0
233597,-0.854419,301.023499,101863.546875,0.0,0.0
233598,-0.854419,301.023499,101863.546875,0.0,0.0


In [77]:
#Data normalization

df_data["Perturbation p. temperature"] = df_data["Perturbation p. temperature"]/df_data["Perturbation p. temperature"].max()
df_data["Temperature 2m"] = df_data["Temperature 2m"]/df_data["Temperature 2m"].max()
df_data["Surface pressure"] = df_data["Surface pressure"]/df_data["Surface pressure"].max()
df_data["Total c. precipitation"] = df_data["Total c. precipitation"]/df_data["Total c. precipitation"].max()
df_data["Total g. scale precipitation"] = df_data["Total g. scale precipitation"]/df_data["Total g. scale precipitation"].max()

df_data

Unnamed: 0,Perturbation p. temperature,Temperature 2m,Surface pressure,Total c. precipitation,Total g. scale precipitation
0,-0.039051,0.961218,0.987797,0.0,0.0
1,-0.039051,0.961218,0.987797,0.0,0.0
2,-0.039051,0.961218,0.987797,0.0,0.0
3,-0.039051,0.961218,0.987797,0.0,0.0
4,-0.039051,0.961218,0.987797,0.0,0.0
...,...,...,...,...,...
233595,-0.102993,0.964995,0.993938,0.0,0.0
233596,-0.084105,0.965467,0.993893,0.0,0.0
233597,-0.085073,0.965444,0.993891,0.0,0.0
233598,-0.085073,0.965444,0.993891,0.0,0.0


In [78]:
# Preliminary statistics

df_data.describe()

Unnamed: 0,Perturbation p. temperature,Temperature 2m,Surface pressure,Total c. precipitation,Total g. scale precipitation
count,233600.0,233600.0,233600.0,233600.0,233600.0
mean,0.072326,0.968615,0.989447,0.011775,0.004108166
std,0.103319,0.004893,0.011792,0.039977,0.02814724
min,-0.340076,0.928363,0.802003,0.0,0.0
25%,0.028586,0.967729,0.990039,0.0,0.0
50%,0.070112,0.96914,0.991531,0.0,1.933211e-24
75%,0.104886,0.97056,0.99286,0.001365,1.008027e-05
max,1.0,1.0,1.0,1.0,1.0


In [79]:
#Data selection (filtering)

mean = df_data["Total c. precipitation"].mean()
mean_s_prep = df_data["Total g. scale precipitation"].mean()
df_data = df_data.loc[(df_data["Total c. precipitation"] >= mean) & (df_data["Total g. scale precipitation"] >= mean_s_prep)]

df_data.reset_index(drop=True)

Unnamed: 0,Perturbation p. temperature,Temperature 2m,Surface pressure,Total c. precipitation,Total g. scale precipitation
0,0.038655,0.967985,0.990588,0.114816,0.032454
1,-0.119912,0.962894,0.993561,0.130743,0.058843
2,0.034724,0.967945,0.993150,0.032445,0.008107
3,0.022878,0.967611,0.993261,0.029689,0.005642
4,0.028237,0.968222,0.994026,0.052087,0.006148
...,...,...,...,...,...
20995,-0.048760,0.966333,0.993763,0.068912,0.033627
20996,-0.068598,0.965960,0.993794,0.105259,0.023739
20997,-0.099264,0.965296,0.993897,0.120073,0.018344
20998,-0.093625,0.965333,0.993926,0.222439,0.137232


In [80]:
#Data correlation

df_data.corr()

Unnamed: 0,Perturbation p. temperature,Temperature 2m,Surface pressure,Total c. precipitation,Total g. scale precipitation
Perturbation p. temperature,1.0,0.793627,-0.099362,-0.003558,-0.028956
Temperature 2m,0.793627,1.0,0.368352,-0.073163,-0.077287
Surface pressure,-0.099362,0.368352,1.0,-0.05748,-0.095137
Total c. precipitation,-0.003558,-0.073163,-0.05748,1.0,0.734707
Total g. scale precipitation,-0.028956,-0.077287,-0.095137,0.734707,1.0


In [81]:
df_data.to_csv("output.csv", index=False)