# Import

In [None]:
import gzip
import os
import shutil
import tempfile
import netCDF4
from netCDF4 import Dataset
import numpy as np
import pandas as pd
import math
import requests
import time
import random
from PIL import Image, ImageDraw, ImageFilter
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

# Bioclim - variables - calculation - functions

## Bio 1 / Annual Mean Temperature

In [1]:
def bio_1_annual_mean_temp(year, lon, lat):
    temp_mean = 'tmp'
    return retrieve_CRU_data(temp_mean, year, lon, lat).mean()

## Bio 2 / Mean Diurnal Range (Mean of monthly (max temp - min temp))

In [None]:
def bio_2_mean_diurnal_range(year, lon, lat):
    t_min = retrieve_CRU_data('tmn', year, lon, lat)
    t_max = retrieve_CRU_data('tmx', year, lon, lat)
    diurnal = t_max-t_min
    return diurnal.mean()

## BIO 3 / Isothermality (BIO2/BIO7) (×100)

In [None]:
def bio_3_isothermality(year, lon, lat):
    val = bio_2_mean_diurnal_range(year, lon, lat)/bio_7_temp_range(year, lon, lat)
    return val

## BIO 4 / Temperature Seasonality (standard deviation ×100)

In [None]:
def bio_4_temperature_seasonality(year, lon, lat):
    t_mean = retrieve_CRU_data('tmp', year, lon, lat)
    return np.std(t_mean)

## BIO 5 / Max Temperature of Warmest Month

In [None]:
def bio_5_max_temp_of_max_month(year, lon, lat):
    t_max = retrieve_CRU_data('tmx', year, lon, lat)
    return np.max(t_max)

## BIO 6 / Min Temperature of Coldest Month

In [None]:
def bio_6_min_temp_of_min_month(year, lon, lat):
    t_min = retrieve_CRU_data('tmn', year, lon, lat)
    return np.min(t_min)

## BIO 7 / Temperature Annual Range (BIO5-BIO6)

In [None]:
def bio_7_temp_range(year, lon, lat):
    rang_temp = bio_5_max_temp_of_max_month(year, lon, lat)-bio_6_min_temp_of_min_month(year, lon, lat)
    return rang_temp

## BIO 8 / Mean Temperature of Wettest Quarter

In [None]:
def bio_8_mean_temp_wet_q(year, lon, lat):
    temp_wet = retrieve_CRU_data('wet', year, lon, lat)
    li_temp = []
    for i in range(0,11,3):
        li_temp.append(temp_wet[i:i+3].sum())
    max_index=li_temp.index(max(li_temp))
    ll = [0,3,6,9]
    st = ll[max_index]
    return retrieve_CRU_data('tmp', year, lon, lat)[st:st+3].mean()

## BIO 9 / Mean Temperature of Driest Quarter

In [None]:
def bio_9_mean_temp_dry_q(year, lon, lat):
    temp_wet = retrieve_CRU_data('wet', year, lon, lat)
    li_temp = []
    for i in range(0,11,3):
        li_temp.append(temp_wet[i:i+3].sum())
    min_index=li_temp.index(min(li_temp))
    ll = [0,3,6,9]
    st = ll[min_index]
    return retrieve_CRU_data('tmp', year, lon, lat)[st:st+3].mean()

## BIO10 / Mean Temperature of Warmest Quarter

In [None]:
def bio_10_mean_temp_warm_q(year, lon, lat):
    temp_wet = retrieve_CRU_data('tmp', year, lon, lat)
    li_temp = []
    for i in range(0,11,3):
        li_temp.append(temp_wet[i:i+3].sum())
    max_index=li_temp.index(max(li_temp))
    ll = [0,3,6,9]
    st = ll[max_index]
    return temp_wet[st:st+3].mean()

## BIO 11 / Mean Temperature of Coldest Quarter

In [None]:
def bio_11_mean_temp_cold_q(year, lon, lat):
    temp_wet = retrieve_CRU_data('tmp', year, lon, lat)
    li_temp = []
    for i in range(0,11,3):
        li_temp.append(temp_wet[i:i+3].sum())
    min_index=li_temp.index(min(li_temp))
    ll = [0,3,6,9]
    st = ll[min_index]
    return temp_wet[st:st+3].mean()

## BIO 12 / Annual Precipitation

In [None]:
def bio_12_annual_prec(year, lon, lat):
    return retrieve_CRU_data('pre', year, lon, lat).sum()

## BIO 13 / Precipitation of Wettest Month

In [None]:
def bio_13_prec_wet_mo(year, lon, lat):
    temp_wet = retrieve_CRU_data('wet', year, lon, lat)
    max_index=np.argmax(temp_wet)
    return retrieve_CRU_data('pre', year, lon, lat)[max_index]

## BIO 14 / Precipitation of Driest Month

In [None]:
def bio_14_prec_dry_mo(year, lon, lat):
    temp_wet = retrieve_CRU_data('wet', year, lon, lat)
    min_index=np.argmin(temp_wet)
    return retrieve_CRU_data('pre', year, lon, lat)[min_index]

## BIO 15 / Precipitation Seasonality (Coefficient of Variation)

In [None]:
def bio_15_prec_wet_mo(year, lon, lat):
    temp_pre = retrieve_CRU_data('pre', year, lon, lat)
    cv = lambda x: np.std(temp_pre, ddof=1) / np.mean(temp_pre) * 100
    return cv(temp_pre)

## BIO 16 / Precipitation of Wettest Quarter

In [None]:
def bio_16_pre_wet_q(year, lon, lat):
    temp_wet = retrieve_CRU_data('wet', year, lon, lat)
    li_temp = []
    for i in range(0,11,3):
        li_temp.append(temp_wet[i:i+3].sum())
    max_index=li_temp.index(max(li_temp))
    ll = [0,3,6,9]
    st = ll[max_index]
    return retrieve_CRU_data('pre', year, lon, lat)[st:st+3].mean()

## BIO 17 / Precipitation of Driest Quarter

In [None]:
def bio_17_pre_dry_q(year, lon, lat):
    temp_wet = retrieve_CRU_data('wet', year, lon, lat)
    li_temp = []
    for i in range(0,11,3):
        li_temp.append(temp_wet[i:i+3].sum())
    min_index=li_temp.index(min(li_temp))
    ll = [0,3,6,9]
    st = ll[min_index]
    return retrieve_CRU_data('pre', year, lon, lat)[st:st+3].mean()

## BIO 18 / Precipitation of Warmest Quarter...

In [None]:
def bio_18_pre_dry_q(year, lon, lat):
    temp_wet = retrieve_CRU_data('tmp', year, lon, lat)
    li_temp = []
    for i in range(0,11,3):
        li_temp.append(temp_wet[i:i+3].sum())
    max_index=li_temp.index(max(li_temp))
    ll = [0,3,6,9]
    st = ll[max_index]
    return retrieve_CRU_data('pre', year, lon, lat)[st:st+3].mean()

## BIO19 / Precipitation of Coldest Quarter

In [None]:
def bio_19_pre_cld_q(year, lon, lat):
    temp_wet = retrieve_CRU_data('tmp', year, lon, lat)
    li_temp = []
    for i in range(0,11,3):
        li_temp.append(temp_wet[i:i+3].sum())
    min_index=li_temp.index(min(li_temp))
    ll = [0,3,6,9]
    st = ll[min_index]
    return retrieve_CRU_data('pre', year, lon, lat)[st:st+3].mean()

# Image scraping

In [None]:
def get_urls_to_scrap():
    ## get plant - multimedia data
    df_m = pd.read_csv('/Users/davidschildberger/code/dadavie/planetary_garden/raw_data/multimedia.txt', sep='\t', index_col='gbifID')
    df_m = df_m[['identifier']]
    ## get plant - info
    df_i = pd.read_csv("/Users/davidschildberger/code/dadavie/planetary_garden/raw_data/data_inkl_bioclim_grs.csv")
    df_i = df.set_index('gbifID')
    media_df = df_i.merge(df_m,  left_index=True, right_index=True)

In [None]:
def img_scraping():
    ## foldername for the thumbnails
    fold_n_thumb = '/Users/davidschildberger/code/dadavie/planetary_garden/raw_data/thumbnails/'
    fi_n = os.listdir(fold_n_thumb)
    files = [os.path.splitext(filename)[0] for filename in os.listdir(fold_n_thumb)]
    #fi_n.remove('.DS_Store') ## in case you have a mac - this might be necessary
    
    ## read out the already existing files in the folder...
    media_df_nodupl = media_df[~media_df.index.duplicated(keep='first')]
    list_indexes = media_df_nodupl.index
    diff_li = list(set(list_indexes) - set(files))
    
    ## shuffle the list - this might be necessary since requests at the same api in a row might deny access
    random.shuffle(diff_li)

    ## request and store images in "images" folder
    for i in range(0, 10000):
        try:
            r = requests.get(media_df_nodupl.loc[int(diff_li[i]),'identifier'], stream=True) #Get request on full_url
            with open(f"/Users/davidschildberger/code/dadavie/planetary_garden/raw_data/images/{int(diff_li[i])}.jpg", 'wb') as f: 
                r.raw.decode_content = True
                shutil.copyfileobj(r.raw, f)
        except:
            continue

        time.sleep(random.uniform(0.5, 1))

In [None]:
def img_croping_and_scaling():
    ## get images from images folde
    fold_n_im = '/Users/davidschildberger/code/dadavie/planetary_garden/raw_data/images/'
    fi_n = os.listdir(fold_n_im)
    fi_n.remove('.DS_Store')

    ## crop and scale images to thumnails folder
    for i in range(len(fi_n)):
        try:
            image = Image.open(f"/Users/davidschildberger/code/dadavie/planetary_garden/raw_data/images/{fi_n[i]}")
            image = crop_max_square(image).resize((300, 300))
            image.save(f"/Users/davidschildberger/code/dadavie/planetary_garden/raw_data/thumbnails/{fi_n[i]}") 
        except:
            continue

In [None]:
def crop_max_square(pil_img):
    return crop_center(pil_img, min(pil_img.size), min(pil_img.size))

In [None]:
def crop_center(pil_img, crop_width, crop_height):
    img_width, img_height = pil_img.size
    return pil_img.crop(((img_width - crop_width) // 2,
                         (img_height - crop_height) // 2,
                         (img_width + crop_width) // 2,
                         (img_height + crop_height) // 2))