In [1]:
import pandas as pd
import numpy as np
import os
import subprocess
import sys
from pathlib import Path
from datetime import datetime
from datetime import timedelta
from multiprocessing import Process, Pool
from dateutil.relativedelta import *
import dateutil.parser
from pathlib import Path

In [2]:
GPS_info = pd.read_csv("IGSNetwork.csv")
df_stations = GPS_info["#StationName"].copy()
df_lat = GPS_info["Latitude"].array
df_long =  GPS_info["Longitude"].array


In [3]:
# Import the geodesic module from geopy library 
from geopy.distance import geodesic as GD
 # For the specified locations, load their latitude and longitude data.
china_coor = (37.970250, 23.722544)#(31.19, 121.50)
mex_coor  = (29.08, -110.96)
closest_stations_n = 8
#Finally, print the distance between the two sites in kilometers.
print("The distance between China Shangai and Max Hermosillo is: ", GD(china_coor,mex_coor).km)

The distance between China Shangai and Max Hermosillo is:  11217.42914961266


In [4]:
df_china = pd.DataFrame({"CHINA_DIST":[GD((current_lat, current_lot),china_coor).km for (current_lat, current_lot) in zip(df_lat,df_long)]})
df_mex = pd.DataFrame({"MEX_DIST":[GD((current_lat, current_lot),mex_coor).km for (current_lat, current_lot) in zip(df_lat,df_long)]})

In [5]:
stations = pd.concat([df_stations, df_china.reindex(df_stations.index), df_mex.reindex(df_stations.index)], axis=1)

In [6]:
index_china = stations.sort_values(by="CHINA_DIST")[0:closest_stations_n].index
index_mex = stations.sort_values(by="MEX_DIST")[0:closest_stations_n].index

In [7]:
index_china.append(index_mex)

Int64Index([112, 183, 311, 399, 182, 444, 242, 244, 43, 333, 103, 270, 252,
            335, 483, 39],
           dtype='int64')

In [8]:
current_stations = stations.iloc[index_china.append(index_mex),:]
print(current_stations)

    #StationName    CHINA_DIST      MEX_DIST
112    DYNG00GRC     21.998436  11219.651156
183    IZMI00TUR    298.070372  11360.992630
311    ORID00MKD    431.445158  10787.239528
399    SOFI00BGR    509.961189  10804.386024
182    ISTA00TUR    572.848723  11213.695325
444    TUBI00TUR    584.112065  11263.340508
242    MAT100ITA    674.080016  10593.761795
244    MATG00ITA    674.080016  10593.761795
43     BLYT00USA  10994.094203    616.276910
333    PIE100USA  10589.499324    638.520030
103    DHLG00USA  11067.067337    663.080189
270    MONP00USA  11144.884169    671.228821
252    MDO100USA  10673.793090    693.834191
335    PIN100USA  11077.998663    725.155434
483    WIDC00USA  11044.083342    745.424342
39     BILL00USA  11109.875277    765.425629


In [9]:

pre_path = "D://dataframe"
file_name = "{station}_{year}_{month}_{day}.csv"
headers = ['dates', 'tec']
dtypes = {'dates': 'str', 'tec': 'float'}
parse_dates = ['dates']

In [10]:
def read_csv_and_erase(path, station):
    headers = ['dates', 'tec']
    dtypes = {'dates': 'str', 'tec': 'float'}
    parse_dates = ['dates']
    df_output = pd.DataFrame()
    for file in Path(".").glob("*{station}*".format(station=station)):
        with file as f:
            current_df_output = pd.read_csv(f, header=None,  comment='#',sep = ' ',names=headers, dtype=dtypes, parse_dates=parse_dates)
            current_df_output = current_df_output.set_index("dates")
            df_output = pd.concat([df_output, current_df_output])
        df_output.to_csv(path , compression = 'xz')
        file.unlink()

In [11]:
#Return None if hte file is in the fs
def get_new_path(day, station):
    current_year = day.strftime('%Y')
    current_month = day.strftime('%m')
    current_path = pre_path + "/" + str(station) 
    current_path_year = current_path + "/" + current_year
    current_path_year_month = current_path_year + "/" + current_month
    current_path_year_day = current_path_year_month + "/" + file_name.format(station=station, year = current_year, month=current_month, day = day.strftime('%d'))
    
    if  Path(current_path).is_dir():
        if  Path(current_path_year).is_dir():
            if Path(current_path_year_month).is_dir():
                if Path(current_path_year_day).is_file():
                    return None
                else:
                    return current_path_year_day
            else:
                os.mkdir(current_path_year_month)
                return current_path_year_day
        else:
            os.mkdir(current_path_year)
            os.mkdir(current_path_year_month)
            return current_path_year_day
    else:
        os.mkdir(current_path)
        os.mkdir(current_path_year)
        os.mkdir(current_path_year_month)
        return current_path_year_day

In [12]:
def get_tec_station(start_day, end_day, station):
    all_days = [start_day+timedelta(days=x) for x in range((end_day-start_day).days)]
    for day in all_days:
        current_year = day.year
        current_month = day.month
        new_path = get_new_path(day, station)
        if new_path != None:
            print(str(station) + " -- " + str(day.strftime('%Y-%m-%d')))
            result = subprocess.run(["ionolabtec.exe", str(station), str(day.strftime('%Y-%m-%d'))], capture_output=True)
            if "does not exist.\\r\\n" in str(result.stdout):
                print(str(station) + " is not valid")
            read_csv_and_erase(new_path, station)
    return 0

In [13]:
import concurrent.futures
import urllib.request
from geopy.distance import geodesic as GD
stations =  list(df_stations[0:10])
stations = current_stations["#StationName"]
def f(station):
    start_day = datetime(2018, 1, 1).date()
    end_day = datetime(2018, 1, 5).date()
    get_tec_station(start_day,end_day, station)

with concurrent.futures.ThreadPoolExecutor(max_workers = 10) as executor:

    future_to_url = {executor.submit(f, station): station for station in stations}
    for future in concurrent.futures.as_completed(future_to_url):
        url = future_to_url[future]
        try:
            data = future.result()
        except Exception as exc:
            print('%r generated an exception: %s' % (url, exc))
        else:
            print("Finished")

DYNG00GRC -- 2018-01-01IZMI00TUR -- 2018-01-01

ORID00MKD -- 2018-01-01
SOFI00BGR -- 2018-01-01
ISTA00TUR -- 2018-01-01
MATG00ITA -- 2018-01-01MAT100ITA -- 2018-01-01BLYT00USA -- 2018-01-01TUBI00TUR -- 2018-01-01



PIE100USA -- 2018-01-01
ISTA00TUR -- 2018-01-02
MATG00ITA -- 2018-01-02
MAT100ITA -- 2018-01-02
DYNG00GRC -- 2018-01-02
ISTA00TUR -- 2018-01-03
DYNG00GRC -- 2018-01-03
MATG00ITA -- 2018-01-03
MAT100ITA -- 2018-01-03
IZMI00TUR is not valid
BLYT00USA is not validTUBI00TUR is not valid

SOFI00BGR is not validIZMI00TUR -- 2018-01-02

TUBI00TUR -- 2018-01-02BLYT00USA -- 2018-01-02

SOFI00BGR -- 2018-01-02
MAT100ITA -- 2018-01-04
ORID00MKD is not valid
ORID00MKD -- 2018-01-02
ISTA00TUR -- 2018-01-04
DYNG00GRC -- 2018-01-04
PIE100USA -- 2018-01-02
MATG00ITA -- 2018-01-04
IZMI00TUR is not valid
IZMI00TUR -- 2018-01-03
SOFI00BGR is not valid
SOFI00BGR -- 2018-01-03
TUBI00TUR is not valid
TUBI00TUR -- 2018-01-03
BLYT00USA is not valid
BLYT00USA -- 2018-01-03
ORID00MKD is not valid
OR

In [51]:
#Including necessary packages
import json
import requests
import os
from urllib.request import urlretrieve

In [52]:
#Checking on queries function
def checking_by_mail(mail):
    """
    checking all accessible information about queries made by <mail>
    input - <mail> string type email address to check
    output - list of dictionaries with all information about every query
    """
    rq = requests.post("https://simurg.iszf.irk.ru/api", 
                        json={"method": "check", 
                        "args": {"email": mail}
                              }
                        )
    return rq.json()

In [4]:
#Showing results for email
email = 'ccd532@uma.es'
for i in checking_by_mail(email):
    print(i)

In [5]:
#Function to show progrees of downloading data
def progress_print(count, block_size, total_size):
    prc = (count * block_size) / (total_size / 100)
    print("{0:3d}/100%".format(int(prc)), end='\r')

In [6]:
#Function to downloading data in same path as notebook 
def download_data(query,type_q,download_t):
    """
    downloading data based on query dictionary
    input - <query> query dictionary, 
            <type_q> string type what type of query download (either 'series' or 'map'), 
            <download_t> string type what to download ('zip' zip archive of maps/series or 'h5' all data in hdf type)
    output - Results of downloading
    """    
    print('id:', query['id'],
          'type:', query['type'],
          'status:', query['status'],
          'site:', query['site'])
    if query['type'] == type_q and query['status'] == 'done':
        directory = os.getcwd()
        file_name = 'example.'+download_t
        if download_t == 'h5':
            url = "https://simurg.iszf.irk.ru/tecs/"+query['paths']['data']
            urlretrieve(url, os.path.join(directory,file_name), reporthook=progress_print)
        else:
            url = "https://simurg.iszf.irk.ru/tecs/"+query['paths']['zippng']
            urlretrieve(url, os.path.join(directory,file_name), reporthook=progress_print)
        if file_name in os.listdir():
            return print("Downloaded as "+file_name+"!")
        else:
            return print("Mistake occurs!")
    else:
        return print("Nothing to Download!")

In [7]:
import requests
import time
from datetime import datetime, timedelta

In [8]:
def get_last_query_by_mail(mail):
    """
    Return last query ID or None if there are no queries
    """
    resp = requests.post("https://simurg.iszf.irk.ru/api", 
                         json={"method": "check", 
                               "args": {"email": mail}
                              }
                        )
    queries = resp.json()
    last = datetime(1970,1,1)
    _id = None
    for q in queries:
        datetime_str = q['created'][:19]
        t = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
        last = max(t, last)
        if last == t:
            _id = q['id']
    return _id

In [9]:
def wait_query_is_done(mail, _id, timeout = 600, delete=True, download=False):
    st = time.time()
    done = False
    while True:
        rq = requests.post("https://simurg.iszf.irk.ru/api", 
                           json={"method": "check", "args": {"email": mail}}
                          )
        queries = rq.json()
        for q in queries:
            if _id == q['id']:
                status = q['status']
                print(f'Query {_id} status is {status}')
                if q['status'] in ['done', 'failed']:
                    done = True
        if done:
            break
        if time.time() - st > timeout:
            break
        time.sleep(60)
    if done == True:
        if delete == True:
            resp = requests.post("https://simurg.iszf.irk.ru/api", 
                                  json={"method": "delete", "args": {"id": _id}}
                                )
            if resp.ok:
                print(f'Deleted DONE {_id}')
            else:
                print(f'Failed to delete {_id}')

In [10]:
def run_long_series(start, end, step, mail, site):
    tformat = '%Y-%m-%d %H:%M'
    tfrom = start
    while tfrom < end:
        try:
            till = tfrom + step
            query_args = {"email": mail, 
                      "begin": tfrom.strftime(tformat), 
                      "end": till.strftime(tformat), 
                      "site": site,
                      "flags": {"notification": False}
                     }
            print(f'STARTING query from {tfrom} to {till}')
            resp = requests.post("https://simurg.iszf.irk.ru/api", 
                             json={"method": "create_series", "args": query_args})

            if resp.ok:
                print(f'CREATED query from {tfrom} to {till}')
                _id = get_last_query_by_mail(mail)
                wait_query_is_done(mail, _id, delete=True)
            else:
                print(f'FAILED TO query from {tfrom} to {till}')
                print(resp.text)
            tfrom = till
        except Exception as e:
            print(e)

In [None]:

start = datetime(2015, 1, 1)
end = datetime(2015, 12, 31)
step = timedelta(3, 0) - timedelta(0, 1)
MAIL = "your@mail.com"                
run_long_series(start, end, step, MAIL, 'irkj')

STARTING query from 2015-01-01 00:00:00 to 2015-01-03 23:59:59
CREATED query from 2015-01-01 00:00:00 to 2015-01-03 23:59:59
STARTING query from 2015-01-03 23:59:59 to 2015-01-06 23:59:58
CREATED query from 2015-01-03 23:59:59 to 2015-01-06 23:59:58
