#### This purpose of this file is to collect numerical data about average wind and wind gust strength from the windfinder website (www.winfinder.com) and to store the collected data into csv file

In [1]:
from datetime import date, timedelta
import datetime
import pandas as pd
import csv
import numpy as np
import os 
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver

#### This function opens chrome and run a simple js script to harvest the data from the website

In [2]:
def get_data(url):
    browser=webdriver.Chrome('C:/Users/ttnnn/Anaconda3/selenium/webdriver/Chrome/chromedriver.exe')
    browser.get(url)
    try:
        temp = browser.execute_script('return window.Highcharts.charts[0].series.slice(0,3).map(a=>a.options.data)')
    except:
        print('Unable to get the data')
        browser.close()
        return None
    data=np.array(temp)
    #print('data shape {}'.format(data.shape))
    try:
        data=np.concatenate([data[0,:,0].reshape(-1,1)//1000,data[0,:,-1].reshape(-1,1),data[1,:,-1].reshape(-1,1)],axis=1)#
    except:
        print('data is not in expected format, possibly no data available for this day')
        browser.close()
        return None
    browser.close()
    return data

#### This function creates date as string in a certain format from the timestamp

In [3]:
def date_from_stamp(x):
    year=datetime.datetime.fromtimestamp(x).year
    month=datetime.datetime.fromtimestamp(x).month
    day=datetime.datetime.fromtimestamp(x).day
    hour=datetime.datetime.fromtimestamp(x).hour
    minute=datetime.datetime.fromtimestamp(x).minute
    if hour>=10 and hour<=19:
        return '{}-{}-{}-{}-{}'.format(year,month,day,hour,minute)
    else:
        return 0

#### This function create a date to retrive the history given how many days back you want to look

In [4]:
def day_back_to_date(days_back):
    if days_back>=0 and days_back<8: #the website only contains information down to one week back
        return str(date.today()-timedelta(days_back))
    else:
        return None

#### This function writes data into csv file for given location and number of days back from today. It creates the directory of the same name as location and writes csv file into this directory

In [5]:
def data_to_file(days_back,location):
    
    dt= day_back_to_date(days_back)
    if dt is None:
        print ('No data available for this date')
        return
    
    if dt==str(date.today()):
        dturl=''
    else:
        dturl=dt
        
    url = "https://www.windfinder.com/report/{}/{}".format(location,dturl)
    
    print('getting data from {} on day {}'.format(location,dt))
   
    data=get_data(url)
    
    if data is None:
        return
    
    df=pd.DataFrame(data,columns=['date','Avg. wind','wind gusts'])

    df.date=df.date.apply(date_from_stamp)

    df=df[~(df['date']==0)]

    try:
        os.makedirs('{}'.format(location))
    except FileExistsError:
        pass
    df.to_csv("{}\{}.csv".format(location,dt),index=False)

#### this is the final script to perform collection of the data and writing it into csv file for given list of locations and range of the days in history, from 0 to maximum 7 days.

In [7]:
locationsWF=['valdevaqueros']#,'tarifa-puerto'

for location in locationsWF:
    for days_back in range(4,8):
        data_to_file(days_back,location)

getting data from valdevaqueros on day 2018-07-11
getting data from valdevaqueros on day 2018-07-10
Unable to get the data
getting data from valdevaqueros on day 2018-07-09
getting data from valdevaqueros on day 2018-07-08
Unable to get the data
