# lasted edited by Claire Valva, June 14, 2018

# 1979 to 2016 300hPa at 40.5 deg data processing - Winter

Will look at winter spectra (Dec/Jan/Feb)

### file details
will be using file: 1979-2016-300hPa-40.5N-z.nc 
which is currently located in ~/uncategorized data

This file contains a single 2D array called “z”, whose dimension is (240, 55520).  The first dimension is longitude, 1.5 degree interval from 0 to 358.5 (0 is Greenwich Mean).  The second dimension is time, 6 hours apart, from 00 UTC January 1, 1979 to 18 UTC December 31, 2016.  The unit is meters^2/sec^2.  Divide by g = 9.81 m/s^2 to obtain geopotential height, the altitude at which you find 300 hPa.

In [2]:
#import packages, for now everything I think I'll need
import numpy as np
from netCDF4 import Dataset, num2date # This is to read .nc files and time array
from scipy.signal import get_window, csd
from scipy.fftpack import fft, ifft, fftshift, fftfreq
import matplotlib.pyplot as plt
from cartopy.util import add_cyclic_point
import cartopy.crs as ccrs
import pandas as pd
import datetime
from math import pi
import csv

In [2]:
filepath = '/home/clairev/uncategorized-data/1979-2016-300hPa-40.5N-z.nc' # Location of the file
fileobj = Dataset(filepath, mode='r')

# Check what's in there
fileobj

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF3_CLASSIC data model, file format NETCDF3):
    dimensions(sizes): longitude(240), time(55520)
    variables(dimensions): float32 [4mz[0m(time,longitude)
    groups: 

In [3]:
#load coordinates
#so height[i] is the geopotential height at a given time
height = fileobj.variables['z'][:]/9.81

#create time array
time_list = []
for i in range(0,55520):
    time_list.append(i*6)
tunit = "hours since 1979-01-01T00:00:00Z"
tarray = num2date(time_list,units = tunit,calendar = 'gregorian')

#create longitude array
lon_list = []
for i in range(240):
    lon_list.append(i*1.5)

In [4]:
#find and index the winter dates
winter_index = []

for i in range(0,55520):
    if tarray[i].month == 1:
        winter_index.append(i)
    if tarray[i].month == 2:
        winter_index.append(i)
    if tarray[i].month == 12:
        winter_index.append(i)

### apply zonal fft to all winter dates

In [5]:
#xf will be always the same
N = 240
T = 1./N
xfzone = fftfreq(N, d=(T))

### apply csd to zonal data

##### to do: compare fft and csd function

In [6]:
#ck will change with date
new_ckzone_listed = []


for i in range(0,55520):
    if i in winter_index: #change index to something smaller if need to check!
        snapshot = height[i]
        window_length = 240
        freqzonei, powerspec_zonei = csd(height[i],height[i], fs=4, 
                           window='hann', 
                           nperseg=window_length, scaling='spectrum')
        new_ckzone_listed.append(powerspec_zonei)
        
freqzone, powerspec_zonei = csd(height[2],height[2], fs=4, 
                           window='hann', 
                           nperseg=window_length, scaling='spectrum')

ckzonesize = len(new_ckzone_listed)

### save zonal transform to csv using pandas

In [81]:
#make dataframe and name columns for the spectra
zonal_df = pd.DataFrame()
zonal_df = zonal_df.append(pd.Series(freqzone), ignore_index = True)
zonal_df = zonal_df.append(pd.Series(new_ckzone_listed[0]), ignore_index = True)

for i in range(1, ckzonesize):
    zonal_df = zonal_df.append(pd.Series(new_ckzone_listed[i]), ignore_index = True)
zonal_df = zonal_df.rename(columns = lambda x : 'spec_' + str(x))

In [82]:
#make the dataframe for index and dates
new_winter = np.insert(winter_index, 0 , -1)
new_dates = np.insert(tarray[winter_index], 0, np.nan)
new_data = {'index' : new_winter, 'date' : new_dates}
new_header = pd.DataFrame(new_data, columns = ['index', 'date'])

In [87]:
# join the tags dataframe back to the original dataframe
full_zonal_dataset = pd.concat([new_header, zonal_df], axis=1)
full_zonal_dataset.to_csv('zonal_winter.csv')

### apply hanning window
transform each year separately, and then take the average spectra

In [89]:
#create indexing for each year, so can find the dates
year_number = 2016 - 1979

year_index = [] #year_index[i] will be index of dates of year 1979 + i 

for j in range(0,year_number): #1979 left out because no december for that winter, as is 2016
    yearj_index = []
    for i in range(0,55520):
        if tarray[i].year == 1979 + j:
            yearj_index.append(i)
    year_index.append(yearj_index)

In [90]:
#find and index the winter dates by month
dec_index = []
jan_index = []
feb_index = []

for i in range(0,55520):
    if tarray[i].month == 1:
        jan_index.append(i)
    if tarray[i].month == 2:
        feb_index.append(i)
    if tarray[i].month == 12:
        dec_index.append(i)

In [4]:
?csd
?fft

In [91]:
#the winter of the year, will be the winter that began in december of the year noted
freq_timebyyr = []
power_timebyyr = []

#choose a window length
window_length = 300

#then transform each year individually 
for i in range(0,N): #for each longitude
    freqi_byyr = [] #will append the fft for each year to this
    poweri_byyr = [] #will append the fft for each year to this, then add to the entire list
    
    for k in range(0,(year_number-1)): #go over each year - so can run and check other things
        
        lonik = []
        for j in range(0,55520):
            if j in dec_index and year_index[k]:
                lonik.append(fileobj.variables['z'][j,i]/9.81)
            
            if j in jan_index and year_index[k+1]:
                lonik.append(fileobj.variables['z'][j,i]/9.81)
            
            if j in feb_index and year_index[k + 1]:
                lonik.append(fileobj.variables['z'][j,i]/9.81)
    
        freqi_yeark, power_speci_yeark = csd(lonik,lonik, fs=4, 
                           window='hann', 
                           nperseg=window_length, scaling='spectrum')
        freqi_byyr.append(freqi_yeark)
        poweri_byyr.append(power_speci_yeark)

    freq_timebyyr.append(freqi_byyr)
    power_timebyyr.append(poweri_byyr)    

### write results to a pandas dataframe and save as csv

In [96]:
#make dataframe and name columns for the spectra
time_df = pd.DataFrame()
time_df = time_df.append(pd.Series(freq_timebyyr[0][0]), ignore_index = True)
time_df = time_df.append(pd.Series(power_timebyyr[0][0]), ignore_index = True)

for i in range(0,N):
    for k in range((year_number-1)):
        time_df = time_df.append(pd.Series(power_timebyyr[i][k]), ignore_index = True)

time_df = time_df.rename(columns = lambda x : 'spec_' + str(x))

Unnamed: 0,spec_0,spec_1,spec_2,spec_3,spec_4,spec_5,spec_6,spec_7,spec_8,spec_9,...,spec_141,spec_142,spec_143,spec_144,spec_145,spec_146,spec_147,spec_148,spec_149,spec_150
0,0.0,0.013333,0.026667,0.04,0.053333,0.066667,0.08,0.093333,0.106667,0.12,...,1.88,1.893333,1.906667,1.92,1.933333,1.946667,1.96,1.973333,1.986667,2.0
1,1290.577834,5804.810111,5354.247482,4274.6113,2918.721072,2308.220844,2084.76163,1600.507634,1471.52552,954.355206,...,0.702072,0.721559,0.726366,0.915309,0.84568,0.739804,0.870587,0.969588,3.155281,5.344319
2,1290.577834,5804.810111,5354.247482,4274.6113,2918.721072,2308.220844,2084.76163,1600.507634,1471.52552,954.355206,...,0.702072,0.721559,0.726366,0.915309,0.84568,0.739804,0.870587,0.969588,3.155281,5.344319
3,1290.577834,5804.810111,5354.247482,4274.6113,2918.721072,2308.220844,2084.76163,1600.507634,1471.52552,954.355206,...,0.702072,0.721559,0.726366,0.915309,0.84568,0.739804,0.870587,0.969588,3.155281,5.344319
4,1290.577834,5804.810111,5354.247482,4274.6113,2918.721072,2308.220844,2084.76163,1600.507634,1471.52552,954.355206,...,0.702072,0.721559,0.726366,0.915309,0.84568,0.739804,0.870587,0.969588,3.155281,5.344319
5,1313.893161,5933.475053,5489.713969,4347.400697,2891.391781,2286.533538,2031.823365,1596.334592,1480.581327,929.553636,...,0.703338,0.777942,0.796124,0.94344,0.871145,0.770022,0.861479,0.964481,3.02341,5.125266
6,1313.893161,5933.475053,5489.713969,4347.400697,2891.391781,2286.533538,2031.823365,1596.334592,1480.581327,929.553636,...,0.703338,0.777942,0.796124,0.94344,0.871145,0.770022,0.861479,0.964481,3.02341,5.125266
7,1313.893161,5933.475053,5489.713969,4347.400697,2891.391781,2286.533538,2031.823365,1596.334592,1480.581327,929.553636,...,0.703338,0.777942,0.796124,0.94344,0.871145,0.770022,0.861479,0.964481,3.02341,5.125266
8,1325.729116,6031.567375,5628.2371,4403.799444,2847.845579,2249.168247,1973.859948,1588.897356,1474.171943,910.834954,...,0.825332,0.894414,0.958276,0.978152,0.889071,0.792346,0.808996,0.89464,2.83398,4.837895
9,1325.729116,6031.567375,5628.2371,4403.799444,2847.845579,2249.168247,1973.859948,1588.897356,1474.171943,910.834954,...,0.825332,0.894414,0.958276,0.978152,0.889071,0.792346,0.808996,0.89464,2.83398,4.837895


In [117]:
#make the dataframe for index and dates
year_number = 2016 - 1979
years_list = []
longitude = []

for i in range(year_number-1):
    years_list.append(i + 1979)

for i in range(0,N):
    longitude.append(i*1.5)

In [123]:
#order the years correctly
header_years = [np.nan]

for i in range(0,N):
    for k in range((year_number-1)):
        header_years.append(years_list[k])

header_long = [np.nan]
for i in range(0,N):
    for k in range((year_number-1)):
        header_long.append(longitude[i])

In [124]:
#merge the datasets
header_data = {'year' : header_years, 'longitude' : header_long}
full_header = pd.DataFrame(header_data, columns = ['year', 'longitude'])

In [127]:
# join the tags dataframe back to the original dataframe
full_time_dataset = pd.concat([full_header, time_df], axis=1)
full_time_dataset.to_csv('time_fft_winter.csv')