In [1]:
from netCDF4 import Dataset
import numpy as np
import dateutil.parser
import matplotlib.pyplot as plt
import os
from glob import glob
import tqdm
import pandas as pd
import datetime

sonde_dir = '../data/MOSAiC/sondes/'

result = [y for x in os.walk(sonde_dir) for y in glob(os.path.join(x[0], '*.nc'))]

In [2]:
list_of_dicts = []

for f in tqdm.tqdm(result[:]):
    
    d = Dataset(f)
    datestring = d['time'].units.split(' ')[-1]
    dt0 = dateutil.parser.parse(datestring)
    lon0, lat0 = float(d['lon'][0]), float(d['lat'][0])

    alt = np.array(d['alt'])
    temp = np.array(d['temp'])-273.15
    wind = np.array(d['wspeed'])
    
    # Calculate inversion heights and strengths
    
    t0 = np.nanmedian(temp[(alt<10) & (alt>-10)])
    inversion_height = alt[np.argmax(temp)]
    inversion_strength = np.max(temp) - t0
    
    # Calculate low-level stability (850-2m tempreture)
    
    press = np.array(d['press'])
    t850hpa = temp[np.argmin(np.abs(press-850))]
    t2m = temp[np.argmin(np.abs(alt-2))]
    lls = t850hpa - t2m
    
    rh = np.asarray(d['rh'])/100
    rh_sub_850 = np.nanmean(rh[press<851])
    
    w0 = np.nanmedian(wind[(alt<10) & (alt>-10)])
    w850 = wind[np.argmin(np.abs(press-850))]
    

    dic = {'t0':t0,
           'dt0':dt0,
           'month':dt0.month,
           'lon':lon0,
           'lat':lat0,
           'inversion_strength':inversion_strength,
           'inversion_height':inversion_height,
           'low_level_stability':lls,
           't_850hpa':t850hpa,
           'rh_sub_850':rh_sub_850,
           'surf_wind_velocity':w0,
           'wind_shear':w850-w0,
          }
    
    list_of_dicts.append(dic)
    
df = pd.DataFrame(list_of_dicts)

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
100%|█████████████████████████████████████████| 624/624 [01:18<00:00,  7.92it/s]


In [3]:
df

Unnamed: 0,t0,dt0,month,lon,lat,inversion_strength,inversion_height,low_level_stability,t_850hpa,rh_sub_850,surf_wind_velocity,wind_shear
0,-27.561951,2020-02-16 22:43:34.370000+00:00,2,79.627572,88.069349,6.222931,579.749268,5.231018,-22.340195,0.273186,6.142340,1.432100
1,-23.267990,2020-02-24 16:52:23.852000+00:00,2,52.539957,88.584139,,17379.167969,5.955933,-17.312057,0.339253,12.822152,6.246299
2,-24.137451,2020-02-25 16:51:36.069000+00:00,2,44.385008,88.549541,6.794998,570.027283,4.590500,-19.546951,0.224312,12.269664,9.219295
3,,2020-02-15 22:44:46.693000+00:00,2,79.744262,88.067856,,494.141846,10.335236,-26.076447,0.228418,,
4,-29.049454,2020-02-26 16:49:32.343000+00:00,2,39.222764,88.473025,8.806351,890.785461,8.317902,-20.591187,0.274485,8.542025,4.615234
...,...,...,...,...,...,...,...,...,...,...,...,...
619,,2020-01-03 22:50:04.390000+00:00,1,115.546444,86.954808,,12420.911133,6.965759,-17.073944,0.278028,,
620,-26.243912,2020-01-26 10:54:35.932000+00:00,1,93.679801,87.415841,6.904388,513.987671,4.768631,-21.475281,0.253483,5.623881,4.731204
621,-25.514267,2020-01-04 22:49:24.713000+00:00,1,115.339948,87.040306,6.743668,1154.372803,6.662201,-18.852066,0.207381,5.384744,1.662308
622,,2020-01-24 04:55:36.050000+00:00,1,93.267425,87.424958,,434.193237,10.026321,-21.034790,0.289284,,


In [4]:
df_c = df.dropna(subset = ['dt0', 'low_level_stability'])

df_c['dt_'] = [x.to_datetime64() for x in df_c['dt0']]

df_c = df_c[df_c['dt_'] < datetime.datetime(2020,7,1)]
df_c = df_c[df_c['dt_'] > datetime.datetime(2019,10,15)]

df_c.to_csv('../data/tables/MOSAiC.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c['dt_'] = [x.to_datetime64() for x in df_c['dt0']]


In [9]:
df_c.sort_values('dt0')[200:250]


Unnamed: 0,t0,dt0,month,lon,lat,inversion_strength,inversion_height,low_level_stability,t_850hpa,rh_sub_850,surf_wind_velocity,wind_shear,dt_
384,-28.317108,2019-12-22 04:52:12.753000+00:00,12,112.538072,86.678483,,14050.96582,12.149902,-16.167206,0.230034,3.7843,4.433898,2019-12-22 04:52:12.753
373,-26.828644,2019-12-22 10:52:05.608000+00:00,12,112.635715,86.672247,12.057709,1204.779785,11.629974,-15.198669,0.242225,2.732876,7.23046,2019-12-22 10:52:05.608
362,,2019-12-22 16:57:02.403000+00:00,12,112.728182,86.66776,,362.41394,1.885986,-16.444427,0.277037,,,2019-12-22 16:57:02.403
461,-17.835197,2019-12-22 22:53:12.429000+00:00,12,112.882442,86.657757,3.981743,351.186218,0.671585,-17.007019,0.249053,5.809114,1.862307,2019-12-22 22:53:12.429
379,-21.990891,2019-12-23 04:54:44.285000+00:00,12,113.005715,86.649269,7.264938,933.570862,6.233139,-15.757751,0.252526,4.548305,2.329622,2019-12-23 04:54:44.285
376,-25.568382,2019-12-23 10:59:40.152000+00:00,12,113.15141,86.641628,,23255.822266,10.78273,-14.737183,0.228403,3.645938,-1.191385,2019-12-23 10:59:40.152
357,-26.707382,2019-12-23 16:50:49.751000+00:00,12,113.26006,86.635734,12.852188,474.588348,10.361572,-16.325958,0.226327,4.998038,1.994176,2019-12-23 16:50:49.751
434,-27.588959,2019-12-23 22:53:33.324000+00:00,12,113.445072,86.630625,13.932281,772.398071,10.945923,-16.643036,0.249527,7.120088,2.305793,2019-12-23 22:53:33.324
389,-27.398445,2019-12-24 04:53:48.666000+00:00,12,113.642326,86.627605,12.167244,749.745056,10.962158,-16.650421,0.241729,5.245157,3.519503,2019-12-24 04:53:48.666
398,-28.225166,2019-12-24 10:52:53.510000+00:00,12,113.890729,86.624768,12.507118,710.252197,10.21933,-17.856232,0.394938,3.633289,6.605672,2019-12-24 10:52:53.510
