In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import datetime

In [None]:
# read and cleanup meteorological data from near BAZAN
# From: https://ims.data.gov.il/ims/1   station Haifa-Batey Zikuk (600)
bz = pd.read_json('ims_bazan.json')
bz['dt'] = pd.to_datetime(bz['time_obs'])
bz['date'] = bz['dt'].dt.date
bz['hour'] = bz['dt'].dt.hour
#bz=bz[bz['time_obs'].str.match('.*T02:00:00$')]         # only choose first observation from each day
bz = bz.mask(bz == -9999)

# average wind vectors
# separate into X and Y coordinates, average, and compose back
bz['east'] = np.sin(bz['wind_dir'] * math.pi/180.0)
bz['north'] = np.cos(bz['wind_dir'] * math.pi/180.0)
bz['dir'] = (np.arctan2(bz['east'], bz['north'])* 180/math.pi) % 360
bz = bz.groupby(['date']).mean()
bz['mean_dir'] = (np.arctan2(bz['east'], bz['north'])* 180/math.pi) % 360

#bz.index = bz['date']



In [None]:
# read and cleanup air data

st = pd.read_csv("StationData.csv")
xlat = { 'שפרינצק' : 'Shprintzak',
             'רוממה' : 'Romema',
             'קריית ים' : 'Kiryat Yam',
             'קריית טבעון' : 'Kiryat Tivon',
             'קריית חיים-דגניה' : 'Kiryat Haim-Degania',
             'קריית חיים - דגניה' : 'Kiryat Haim-Degania',
             'קריית חיים-רגבים' : 'Kiryat Haim-Regavim',
             'קריית חיים' : 'Kiryat Haim-Regavim',
             'קריית ביאליק' : 'Kiryat Bialik',
             'ק.ביאליק עופרים' : 'Kiryat Bialik Ofarim',
             'ק.מוצקין בגין' : 'Kiryat Motzkin Begin',
             'קריית אתא' : 'Kiryat Ata',
             'נשר' : 'Nesher',
             'נווה שאנן' : 'Neve Shaanan',
             'נווה יוסף' : 'Neve Yosef',
             'כרמליה' : 'Carmelia',
             'כפר חסידים' : 'Kfar Hasidim',
             'יזרעאליה' : 'Yizraelia',
             'איינשטיין' : 'Einstein',
             'איגוד' : 'Igud (check-post)',
             'חיפה - איגוד' : 'Igud (check-post)',
             'אחוזה' : 'Ahuza',
             'קריית מוצקין' : 'Kiryat Motzkin',
             'קריית בנימין' : 'Kiryat Binyamin',
             'דליית אל כרמל' : 'D.CARMEL',
             'שמן' : 'Shemen',
             'פז שמנים' : 'Paz Shmanim',
             'פז' : 'Paz',
             'דלק' : 'Delek',
             'סונול' : 'Sonol',
             'תשן נמל הדלק' : 'Tashan fuel port',
             'דור כימיקלים' : 'Dor chemicals',
             'Haifa g-40' : 'mahzam 40',
             'Haifa g-30' : 'mahzam 30',
             'שוק' : 'SHOOK',
             'BAZAN TO-1,2' : 'BAZAN TO-1 2',
             'הדר' : 'Hadar',
             'אחוזה תחבורתית' : 'Ahuza transportation',
             'חוגים' : 'Hugim',
             'עצמאות חיפה' : 'Atzmaut',
             'ד.עכו - ק.מוצקין' : 'Kiryat Motzkin Acco road',
             'ניידת 6' : 'Mobile 6',
             'ניידת 5' : 'Mobile 5',
             'ניידת 4' : 'Mobile 4',
             'פארק כרמל' : 'Park Carmel',
             'מסופי ניפוק' : 'Masofei Nipuk',
             'תחנה ניידת איגוד' : 'Mobile Igud',
             'תאריך  \ שעה' : 'DateTime'
}

def cleanup(s):
    p = re.compile('^(.*)\s*\.\d+', re.VERBOSE)
    ret = p.sub(r'\1', s).strip()
    return xlat.get(ret, ret)

# The column names is the composition of the first two rows
hd = st[0:2].T
hd['idx'] = hd.index   # shift index over to a column
hd['sensor'] = hd['idx'].apply(cleanup) + '_' + hd[0].apply(str.strip)

# move back to our dataframe
st.columns = hd['sensor']
# Need to drop the first two rows, and an empty first column
st = st.drop([0, 1], axis=0).drop(st.columns[0], axis=1)

# the bottom of the data has some rows for statistics, remove them
date_pattern = "\d+/\d+/\d+"
filter = st['DateTime_'].str.contains(date_pattern)
st = st[filter]

# parse dates in Israeli format. 
st.rename(index=pd.to_datetime(st['DateTime_'].str.strip().str[:10], format="%d/%m/%Y"), inplace=True)
# The timestamp is "24:00" which really means the next day
st.index = st.index + datetime.timedelta(days=1)

# convert to numeric
st = st.apply(lambda x: pd.to_numeric(x, errors='coerce'), axis=1)

#st.to_excel('StationData.xlsx')

In [None]:
joined = pd.merge(st, bz, left_index=True, right_index=True)
bz.columns
joined[['describe()

In [None]:
pairs = [['Igud (check-post)_RH', 'hmd_rlt'],
         ['Igud (check-post)_WDS', 'wind_spd'],
         ['Igud (check-post)_WDD', 'wind_dir'],
         ['Igud (check-post)_WDD', 'mean_dir'],
         ['mean_dir', 'dir'],
         ['wind_dir', 'dir'],
         ['Igud (check-post)_WDD', 'dir'],
         ['Igud (check-post)_TEMP', 'tmp_air_dry'],
         ['Igud (check-post)_TEMP', 'tmp_air_wet']]

#joined[['Haifa-Igud : RH', 'hmd_rlt']].corr(method='spearman')
#joined[['Haifa-Igud : TEMP', 'tmp_air_wet']].corr(method='spearman')

for i in range(len(pairs)):
    c1=pairs[i][0]
    c2=pairs[i][1]
    joined.plot.scatter(x=c1, y=c2)
    plt.show()
    print (joined[[c1, c2]].corr(method='spearman'))

