In [1]:
import netCDF4
import numpy as np
import pandas as pd

In [2]:
nc = netCDF4.Dataset('data/sst.mnmean.nc', mode='r')
nc.variables.keys()

dict_keys(['lat', 'lon', 'time_bnds', 'time', 'sst'])

In [3]:
sst = nc.variables['sst']
print(sst.dimensions)

('time', 'lat', 'lon')


In [4]:
lon = nc.variables['lon']
lat = nc.variables['lat']
time = nc.variables['time']
lon_array = lon[:]
lat_array = lat[:]
time_array = time[:]

In [6]:
df_gulf = pd.read_csv('data/lon-lat/lon-lat_gulf.csv')
df_mid_atl = pd.read_csv('data/lon-lat/lon-lat_mid-atl.csv')
df_ne = pd.read_csv('data/lon-lat/lon-lat_ne.csv')
df_south_atl = pd.read_csv('data/lon-lat/lon-lat_south-atl.csv')
df_ak = pd.read_csv('data/lon-lat/alaska-lat-lon.csv')
df_ak.columns = ['lon', 'lat', 'coordinates']
df_norcal = pd.read_csv('data/lon-lat/lon-lat_norcal.csv')
df_socal = pd.read_csv('data/lon-lat/lon-lat_socal.csv')
df_nw = pd.read_csv('data/lon-lat/lon-lat_nw.csv')

In [7]:
#format 1: gulf, mid-atl, ne, south-atl
#format 2: ak, norcal, socal, nw
def get_monthly_temps_format_1(row):
    a = np.abs(lon_array - (360 - int(row['lon']))).argmin()
    b = np.abs(lat_array - int(row['lat'])).argmin()
    c = np.abs(time_array - 65743).argmin()
    return sst[c:, b, a]
def get_monthly_temps_format_2(row):
    a = np.abs(lon_array - (360 + int(row['lon']))).argmin()
    b = np.abs(lat_array - int(row['lat'])).argmin()
    c = np.abs(time_array - 65743).argmin()
    return sst[c:, b, a]
def get_mt_len(row):
    return len(row['monthly_temps'])

In [8]:
df_gulf['monthly_temps'] = df_gulf.apply(lambda x: get_monthly_temps_format_1(x), axis=1)
df_mid_atl['monthly_temps'] = df_mid_atl.apply(lambda x: get_monthly_temps_format_1(x), axis=1)
df_ne['monthly_temps'] = df_ne.apply(lambda x: get_monthly_temps_format_1(x), axis=1)
df_south_atl['monthly_temps'] = df_south_atl.apply(lambda x: get_monthly_temps_format_1(x), axis=1)
df_ak['monthly_temps'] = df_ak.apply(lambda x: get_monthly_temps_format_2(x), axis=1)
df_ak.drop(16) #faulty station value

df_norcal['monthly_temps'] = df_norcal.apply(lambda x: get_monthly_temps_format_2(x), axis=1)
df_socal['monthly_temps'] = df_socal.apply(lambda x: get_monthly_temps_format_2(x), axis=1)
df_nw['monthly_temps'] = df_nw.apply(lambda x: get_monthly_temps_format_2(x), axis=1)
df_norcal

Unnamed: 0,lon,lat,coord,monthly_temps
0,-128,42,"(-128, 42)","[11.538263, 11.142895, 11.0544195, 11.12162, 1..."
1,-126,42,"(-126, 42)","[11.287214, 11.181692, 11.171088, 11.239025, 1..."
2,-128,40,"(-128, 40)","[12.57473, 12.166348, 11.909349, 11.98165, 12...."
3,-126,40,"(-126, 40)","[12.240147, 12.088392, 11.904621, 11.8491535, ..."
4,-128,38,"(-128, 38)","[13.736889, 13.296152, 12.894535, 12.969714, 1..."
5,-126,38,"(-126, 38)","[13.338353, 13.081243, 12.793622, 12.67702, 12..."
6,-124,38,"(-124, 38)","[13.119895, 13.089291, 12.835169, 12.389524, 1..."
7,-126,36,"(-126, 36)","[14.604786, 14.227657, 13.867433, 13.738002, 1..."
8,-124,36,"(-124, 36)","[14.204532, 14.031601, 13.764437, 13.329212, 1..."
9,-122,36,"(-122, 36)","[14.0392885, 14.039614, 13.796079, 13.085211, ..."


In [11]:
df_gulf['len_mt'] = df_gulf.apply(lambda x: get_mt_len(x), axis=1)
df_mid_atl['len_mt'] = df_mid_atl.apply(lambda x: get_mt_len(x), axis=1)
df_ne['len_mt'] = df_ne.apply(lambda x: get_mt_len(x), axis=1)
df_south_atl['len_mt'] = df_south_atl.apply(lambda x: get_mt_len(x), axis=1)
df_ak['len_mt'] = df_ak.apply(lambda x: get_mt_len(x), axis=1)
df_norcal['len_mt'] = df_norcal.apply(lambda x: get_mt_len(x), axis=1)
df_socal['len_mt'] = df_socal.apply(lambda x: get_mt_len(x), axis=1)
df_nw['len_mt'] = df_nw.apply(lambda x: get_mt_len(x), axis=1)

    lon  lat coordinates                                      monthly_temps  \
0    96   28    (96, 28)  [20.730728, 19.63417, 20.033535, 21.657959, 25...   
1    94   28    (94, 28)  [20.407877, 19.374058, 19.950226, 21.632412, 2...   
2    92   28    (92, 28)  [20.48052, 19.540897, 20.268312, 21.93986, 25....   
3    90   28    (90, 28)  [20.907722, 20.058928, 20.833769, 22.449533, 2...   
4    88   28    (88, 28)  [21.421728, 20.593494, 21.332998, 22.859287, 2...   
5    86   28    (86, 28)  [21.268625, 20.310617, 21.067694, 22.633436, 2...   
6    84   28    (84, 28)  [20.789728, 19.642757, 20.44814, 22.161278, 25...   
7    96   26    (96, 26)  [22.127575, 21.213999, 21.394415, 22.738552, 2...   
8    94   26    (94, 26)  [22.37084, 21.521923, 21.738377, 22.97299, 25....   
9    92   26    (92, 26)  [22.519243, 21.75671, 22.086731, 23.258965, 25...   
10   90   26    (90, 26)  [22.911386, 22.254501, 22.68875, 23.832203, 26...   
11   88   26    (88, 26)  [23.56648, 22.99217, 23.47

In [12]:
def get_yearly_temps(row):
    mt = row['monthly_temps']
    yt = []
    for i in range(0, int(len(mt)/12)):
        yt.append(mt[i * 12])
    return yt
def get_avg_yearly_temps(df):
    avg_yearly_temps = []
    n = 0
    for r in df.itertuples():
        for i in range(len(r.yearly_temps)):
            if(i >= len(avg_yearly_temps)):
                avg_yearly_temps.append(r.yearly_temps[i])
            else:
                avg_yearly_temps[i] += r.yearly_temps[i]
        n += 1
    for i in range(len(avg_yearly_temps)):
        avg_yearly_temps[i] /= n
    return avg_yearly_temps

In [14]:
test = df_socal.apply(lambda x: get_yearly_temps(x), axis=1)
df_gulf['yearly_temps'] = df_gulf.apply(lambda x: get_yearly_temps(x), axis=1)
df_mid_atl['yearly_temps'] = df_mid_atl.apply(lambda x: get_yearly_temps(x), axis=1)
df_ne['yearly_temps'] = df_ne.apply(lambda x: get_yearly_temps(x), axis=1)
df_south_atl['yearly_temps'] = df_south_atl.apply(lambda x: get_yearly_temps(x), axis=1)
df_ak['yearly_temps'] = df_ak.apply(lambda x: get_yearly_temps(x), axis=1)
df_norcal['yearly_temps'] = df_norcal.apply(lambda x: get_yearly_temps(x), axis=1)
df_socal['yearly_temps'] = df_socal.apply(lambda x: get_yearly_temps(x), axis=1)
df_nw['yearly_temps'] = df_nw.apply(lambda x: get_yearly_temps(x), axis=1)

In [19]:
avg_yearly_gulf = get_avg_yearly_temps(df_gulf)
avg_yearly_mid_atl = get_avg_yearly_temps(df_mid_atl)
avg_yearly_ne = get_avg_yearly_temps(df_ne)
avg_yearly_south_atl = get_avg_yearly_temps(df_south_atl)
avg_yearly_ak = get_avg_yearly_temps(df_ak)
avg_yearly_norcal = get_avg_yearly_temps(df_norcal)
avg_yearly_socal = get_avg_yearly_temps(df_socal)
avg_yearly_nw = get_avg_yearly_temps(df_nw)

In [21]:
avg_yearly_gulf_df = pd.DataFrame(avg_yearly_gulf, columns=['avg_yearly_temp'])
avg_yearly_mid_atl_df = pd.DataFrame(avg_yearly_mid_atl, columns=['avg_yearly_temp'])
avg_yearly_ne_df = pd.DataFrame(avg_yearly_ne, columns=['avg_yearly_temp'])
avg_yearly_south_atl_df = pd.DataFrame(avg_yearly_south_atl, columns=['avg_yearly_temp'])
avg_yearly_ak_df = pd.DataFrame(avg_yearly_ak, columns=['avg_yearly_temp'])
avg_yearly_norcal_df = pd.DataFrame(avg_yearly_norcal, columns=['avg_yearly_temp'])
avg_yearly_socal_df = pd.DataFrame(avg_yearly_socal, columns=['avg_yearly_temp'])
avg_yearly_nw_df = pd.DataFrame(avg_yearly_nw, columns=['avg_yearly_temp'])

In [22]:
avg_yearly_gulf_df.to_csv('data/temp/avg_yearly_gulf.csv', index=False)
avg_yearly_mid_atl_df.to_csv('data/temp/avg_yearly_mid_atl.csv', index=False)
avg_yearly_ne_df.to_csv('data/temp/avg_yearly_ne.csv', index=False)
avg_yearly_south_atl_df.to_csv('data/temp/avg_yearly_south_atl.csv', index=False)
avg_yearly_ak_df.to_csv('data/temp/avg_yearly_ak.csv', index=False)
avg_yearly_norcal_df.to_csv('data/temp/avg_yearly_norcal.csv', index=False)
avg_yearly_socal_df.to_csv('data/temp/avg_yearly_socal.csv', index=False)
avg_yearly_nw_df.to_csv('data/temp/avg_yearly_nw.csv', index=False)