In [7]:
import requests
import os
from tqdm import tqdm

## Create directories to store data

In [8]:
if not os.path.exists('../data'):
    os.mkdir('../data')
if not os.path.exists('../models'):
    os.mkdir('../models')

In [9]:
if not os.path.exists('../data/train'):
    os.mkdir('../data/train')
if not os.path.exists('../data/test'):
    os.mkdir('../data/test')
if not os.path.exists('../models/original_baseline_model'):
    os.mkdir('../models/original_baseline_model')
if not os.path.exists('../models/new_baseline_model'):
    os.mkdir('../models/new_baseline_model')

## Download train data

In [10]:
url = 'https://portal.nersc.gov/project/ClimateNet/climatenet_new/train/'
r = requests.get(url, allow_redirects=True)

In [11]:
file_names = []
for line in tqdm(r.iter_lines()):
    if "data-" in str(line):
        file_names.append((str(line).split('href="')[1]).split('">data')[0])

In [None]:
for file_name in tqdm(file_names):
    url = 'https://portal.nersc.gov/project/ClimateNet/climatenet_new/train/' + file_name
    r = requests.get(url, allow_redirects=True)
    open('../data/train/' + file_name, 'wb').write(r.content)

## Download test data

In [13]:
url = 'https://portal.nersc.gov/project/ClimateNet/climatenet_new/test/'
r = requests.get(url, allow_redirects=True)

In [14]:
file_names = []
for line in tqdm(r.iter_lines()):
    if "data-" in str(line):
        file_names.append((str(line).split('href="')[1]).split('">data')[0])

In [None]:
for file_name in tqdm(file_names):
    url = 'https://portal.nersc.gov/project/ClimateNet/climatenet_new/test/' + file_name
    r = requests.get(url, allow_redirects=True)
    open('../data/test/' + file_name, 'wb').write(r.content)

## Make CSV from subset of (nb_dates) dates for simple model training

In [16]:
import numpy as np
import pandas as pd
from os import path, listdir
import xarray as xr
from tqdm import tqdm

def get_data(base_path,out_path,nb_dates):
    files = listdir(base_path)
    files = np.random.permutation(files)[:nb_dates]
    data = []
    labels = []
    for file in tqdm(files):
        file_path = path.join(base_path,file)
        file_data = xr.load_dataset(file_path)
        data.append(file_data)
        labels.append(file_data['LABELS'])
    df = xr.concat(data,dim='time').to_dataframe()
    df.sort_values('time')
    return df.to_csv(out_path)

In [None]:
get_data('../data/train','../data/trainSample.csv',3)

In [None]:
get_data('../data/test','../data/testSample.csv',1)

## Download pre-trained models

In [None]:
url = 'https://portal.nersc.gov/project/ClimateNet/climatenet_new/model/config.json'
r = requests.get(url, allow_redirects=True)
open('../models/original_baseline_model/config.json', 'wb').write(r.content)

In [None]:
url = 'https://portal.nersc.gov/project/ClimateNet/climatenet_new/model/weights.pth'
r = requests.get(url, allow_redirects=True)
open('../models/original_baseline_model/weights.pth', 'wb').write(r.content)

In [None]:
url = 'https://portal.nersc.gov/project/ClimateNet/climatenet_new/model/config_new.json'
r = requests.get(url, allow_redirects=True)
open('../models/new_baseline_model/config.json', 'wb').write(r.content)

In [None]:
url = 'https://portal.nersc.gov/project/ClimateNet/climatenet_new/model/weights_new.pth'
r = requests.get(url, allow_redirects=True)
open('../models/new_baseline_model/weights.pth', 'wb').write(r.content)