## pips and includes

In [1]:
import pandas as pd
import requests
import json
from datetime import datetime
import os

## Constants

In [2]:
BEGINING_OF_YEAR = "01010000"
ENDING_OF_YEAR = "12312350"
START_YEAR = 2000
END_YEAR = 2023

DATA_DIRECTORY = "../data/"

columns = [
    "Date Time", "BP (hPa)", "DiffR (w/m^2)", "Grad (w/m^2)", "NIP (w/m^2)", "RH (%)",
    "TD (degC)", "TDmax (degC)", "TDmin (degC)", "WD (deg)", "WDmax (deg)",
    "WS (m/s)", "Ws1mm (m/s)", "Ws10mm (m/s)", "WSmax (m/s)", "STDwd (deg)"
]

column_pairs = [
    ("date", "Date Time"),
    ("BP", "BP (hPa)"),
    ("DiffR", "DiffR (w/m^2)"),
    ("Grad", "Grad (w/m^2)"),
    ("NIP", "NIP (w/m^2)"),
    ("RH", "RH (%)"),
    ("TD", "TD (degC)"),
    ("TDmax", "TDmax (degC)"),
    ("TDmin", "TDmin (degC)"),
    ("WD", "WD (deg)"),
    ("WDmax", "WDmax (deg)"),
    ("WS", "WS (m/s)"),
    ("WS1mm", "Ws1mm (m/s)"),
    ("Ws10mm", "Ws10mm (m/s)"),
    ("WSmax", "WSmax (m/s)"),
    ("STDwd", "STDwd (deg)")
]


## Utills functions

### processing:

In [3]:
def remove_unwanted_keys(data):
    # Remove 'sid', 'sname', and 'date_for_sort' from each record in data
    for record in data['data']['records']:
        if 'date_for_sort' in record:
            del record['date_for_sort']
        if 'sid' in record:
            del record['sid']
        if 'TW' in record:
            del record['TW']
        if 'sname' in record:
            del record['sname']

def replace_column_names(data):
    # Replace the names of the columns by the pairs in column_pairs
    for record in data['data']['records']:
        for new_name, old_name in column_pairs:
            if new_name in record:
                record[old_name] = record.pop(new_name)

def process_data(data):
    remove_unwanted_keys(data)
    replace_column_names(data)

def save_to_csv(data, filename):
    import csv
    # Extract the column names from the first record
    column_names = data['data']['records'][0].keys()
    # Open the file in write mode
    with open(DATA_DIRECTORY+filename, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=column_names)
        # Write the header
        writer.writeheader()
        # Write the data
        for record in data['data']['records']:
            writer.writerow(record)

### fetching:

In [4]:
def fetch_weather_data(station_id, start_date, end_date):
    url = f"https://ims.gov.il/he/envista_station_all_data_time_range/{station_id}/BP%26DiffR%26Grad%26NIP%26RH%26TD%26TDmax%26TDmin%26TW%26WD%26WDmax%26WS%26WS1mm%26Ws10mm%26Ws10maxEnd%26WSmax%26STDwd%26Rain/{start_date}/{end_date}/1/S"
    response = requests.get(url)
    data = json.loads(response.content)
    return data


def fetch_data_for_station(station_id, start_year, end_year):
    all_data = []
    for year in range(start_year, end_year + 1):
        today_fore0 = f"{year}" + BEGINING_OF_YEAR
        today_fore23 = f"{year}" + ENDING_OF_YEAR
        data = fetch_weather_data(station_id, today_fore0, today_fore23)
        process_data(data)
        # Convert the data to a DataFrame and append to the list
        df = pd.DataFrame(data['data']['records'])
        all_data.append(df)
    # Concatenate all DataFrames
    combined_df = pd.concat(all_data, ignore_index=True)
    return combined_df


def get_and_save_station_data(station_name, station_id, start_year, end_year):
    # Get all data for the station
    combined_df = fetch_data_for_station(station_id, start_year, end_year)
    # Convert the DataFrame back to the dictionary format expected by process_data
    data = {'data': {'records': combined_df.to_dict(orient='records')}}
    # Process the data
    process_data(data)
    # Save the data to a CSV file
    filename = f"{station_name}_data_{start_year}_{end_year}.csv"
    save_to_csv(data, filename)



## Download from IMS

### download all north data:

In [5]:
# Load stations_ids.json
with open('stations_ids.json', 'r', encoding='utf-8') as f:
    stations_ids = json.load(f)

# Load stations_details.json
with open('stations_details.json', 'r', encoding='utf-8') as f:
    stations_details = json.load(f)

# Iterate over all stations in area 'N' and use the function get_and_save_station_data
for station_id, station_info in stations_details.items():
    if station_info.get('area') == 'N':
        station_name = station_info.get('name')
        if station_name in stations_ids:
            filename = f"{station_name}_data_{START_YEAR}_{END_YEAR}.csv"
            if not os.path.exists(filename):
                get_and_save_station_data(station_name, stations_ids[station_name], START_YEAR, END_YEAR)
