In [None]:
# This processing step was required as the files from MODIS for the NDVI data were in hdf (Hierarchical Data Format)

# Source for MODIS vegetation indicies data (MOD13A2): https://search.earthdata.nasa.gov/search/granules?p=C2565788905-LPCLOUD!C2565788905-LPCLOUD&pg[1][v]=t&pg[1][m]=download&pg[1][cd]=f&g=G2597482949-LPCLOUD&q=MOD13A2&sp[0]=147.4943%2C70.8291&qt=2010-01-01T00%3A00%3A00.000Z%2C2014-12-31T23%3A59%3A59.999Z&tl=1722677886!3!!&lat=70.12127515262577&long=143.9384765625&zoom=5

In [None]:
import os
import re
import numpy as np
import pandas as pd
from scipy.interpolate import griddata
from pyhdf.SD import SD, SDC
from datetime import datetime, timedelta

In [None]:
# Function for extracting MODIS data from hdf file format and process the data


def extract_info(filepath):
    filename = os.path.basename(filepath)
    
    #extract date from the title
    pattern = r"(?P<product>MOD13A2)\.A(?P<year>\d{4})(?P<day_of_year>\d{3})\.h(?P<h_tile>\d{2})v(?P<v_tile>\d{2})\.(?P<version>\d{3})\.(?P<timestamp>\d+)\.hdf"
    
    match = re.match(pattern, filename)
    if match:
        return match.groupdict()
    else:
        return None

    
#impute the missing data with nearest point (geographical data so the nearest point spatially is likely most accurate)

def impute_values(df, error_value, method='nearest'):
    df = df.where(df != error_value, np.nan)
    x, y = np.meshgrid(np.arange(df.shape[1]), np.arange(df.shape[0]))
    x_flat = x.flatten()
    y_flat = y.flatten()
    z_flat = df.values.flatten()
    mask = ~np.isnan(z_flat)
    z_interpolated = griddata((x_flat[mask], y_flat[mask]), z_flat[mask], (x_flat, y_flat), method=method)
    interpolated_df = pd.DataFrame(z_interpolated.reshape(df.shape), columns=df.columns)
    return interpolated_df


# match the location of the towers to location in MODIS data

def latlon_to_modis_sinusoidal(lat, lon):
    
    R = 6371007.181 #radius of earth in m 
    
    # calculate x and y coords
    
    x = R * np.radians(lon)
    y = R * np.log(np.tan((np.pi / 4) + (np.radians(lat) / 2)))
    return x, y

# calculate the modis tile coordinates from the x,y coords

def find_modis_tile(x, y):
    tile_size = 1200000
    h = int((x / tile_size) + 18)
    v = int(9 - (y / tile_size))
    return h, v

#find the spot on the tile that corrosponds to the x,y coords

def find_modis_pixel(x, y, h, v):
    tile_size = 1200000
    pixel_size = 1000
    x0 = (h - 18) * tile_size
    y0 = (9 - v) * tile_size
    i = int((x - x0) / pixel_size)
    j = int((y0 - y) / pixel_size)
    return i, j

def locate_modis_data(lat, lon):
    x, y = latlon_to_modis_sinusoidal(lat, lon)
    h, v = find_modis_tile(x, y)
    i, j = find_modis_pixel(x, y, h, v)
    return h, v, i, j


# extract 'day_of_year','year','1 km 16 days NDVI','1 km 16 days NIR reflectance'

def process_hdf_file(filepath, lat, lon):
    info = extract_info(filepath)
    if not info:
        return None

    year = int(info['year'])
    day_of_year = int(info['day_of_year'])

    hdf = SD(filepath, SDC.READ)
    NDVI = hdf.select('1 km 16 days NDVI')
    NDVI_ds = pd.DataFrame(NDVI.get())

    NIR = hdf.select('1 km 16 days NIR reflectance')
    NIR_ds = pd.DataFrame(NIR.get())

    NDVI_imp = impute_values(NDVI_ds, error_value=-3000, method='nearest')
    NIR_imp = impute_values(NIR_ds, error_value=-1000, method='nearest')

    h, v, i, j = locate_modis_data(lat, lon)
    NDVI_point = NDVI_imp.iloc[j, i]
    NIR_point = NIR_imp.iloc[j, i]

    data = {
        'year': [year],
        'day_of_year': [day_of_year],
        'NDVI_point': [NDVI_point],
        'NIR_point': [NIR_point]
    }
    df = pd.DataFrame(data)
    return df


# function to process all the files at once in a directory

def process_directory(directory, lat, lon):
    data_frames = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.hdf'):
                filepath = os.path.join(root, file)
                df = process_hdf_file(filepath, lat, lon)
                if df is not None:
                    data_frames.append(df)
    
    if data_frames:
        result_df = pd.concat(data_frames, ignore_index=True)
        return result_df
    else:
        return pd.DataFrame()


In [None]:
# test usage

directory = '/Users/abigailbase/PROJECT FILES/MODIS /MOD13A2- AR_Vir'

lat, lon = -28.2395, -56.1886 #lat,lon of the site

AR_Vir = process_directory(directory, lat, lon)

print(AR_Vir)


In [None]:
#AR-Vir

directory = '/Users/abigailbase/PROJECT FILES/MODIS /MOD13A2- AR_Vir'
lat, lon = -28.2395, -56.1886

result_df = process_directory(directory, lat, lon)

print("\nResulting DataFrame:")
print(result_df)


In [None]:
# AU-Dry

directory = '/Users/abigailbase/Downloads/AU-Dry'
lat,lon=-15.2588,132.3706

AU_Dry=process_directory(directory, lat, lon)

In [None]:
# BE-Vie

directory='/Users/abigailbase/Downloads/BE-Vie'
lat,lon=50.3049,5.9981

In [None]:
BE_Vie=process_directory(directory, lat,lon)

In [None]:
#CA-TP1

directory='/Users/abigailbase/Downloads/CA-TP1'
lat,lon=42.6609,80.5595


In [None]:
CA_TP1=process_directory(directory,lat,lon)

In [None]:
#CH-Cha

directory='/Users/abigailbase/Downloads/CH-Cha'
lat,lon=47.2102,8.4104


In [None]:
CH_Cha=process_directory(directory,lat,lon)

In [None]:
#DE-Gri

directory='/Users/abigailbase/Downloads/DE-Gri'
lat,lon=50.9500,13.5126

In [None]:
DE_Gri=process_directory(directory,lat,lon)

In [None]:
# FR_Pue

directory='/Users/abigailbase/Downloads/FR_Pue'
lat,lon=43.7413,3.5957

In [None]:
FR_Pue=process_directory(directory,lat,lon)

In [None]:
#GF_Guy

directory='/Users/abigailbase/Downloads/GF-Guy'
lat,lon=5.2788,-52.9249

In [None]:
GF_Guy=process_directory(directory,lat,lon)

In [None]:
# IT-Col

directory='/Users/abigailbase/Downloads/IT-Col'
lat,lon=41.8494,-52.9249

In [None]:
IT_Col=process_directory(directory,lat,lon)

In [None]:
#NL-Loo

directory='/Users/abigailbase/Downloads/NL-Loo'
lat,lon=52.1666,5.7436

In [None]:
NL_Loo=process_directory(directory,lat,lon)

In [None]:
#RU-Cok

directory='/Users/abigailbase/Downloads/RU-Cok'
lat,lon=70.8291,147.4943

In [None]:
RU_Cok=process_directory(directory,lat,lon)

In [None]:
#RU-Fyo

directory='/Users/abigailbase/Downloads/RU-Fyo'
lat,lon=56.4615,32.9221

In [None]:
RU_Fyo=process_directory(directory,lat,lon)

In [None]:
#US-PFa

directory='/Users/abigailbase/Downloads/US-PFa'
lat,lon=45.9459,-90.2723

In [None]:
US_PFa=process_directory(directory,lat,lon)

In [None]:
#US-Var

directory='/Users/abigailbase/Downloads/US-Var'
lat,lon=38.4133,-120.9508

In [None]:
US_Var=process_directory(directory,lat,lon)

In [None]:
#ZA-Kru

directory='/Users/abigailbase/Downloads/ZA-Kru'
lat,lon=-25.0197,31.4969

In [None]:
ZA_Kru=process_directory(directory,lat,lon)

In [None]:
## processing AR-Vir

#convert to datetime 

AR_Vir=result_df.copy()

In [None]:
AR_Vir['date'] = pd.to_datetime(AR_Vir['year'].astype(str) + AR_Vir['day_of_year'].astype(str), format='%Y%j')


In [None]:
AR_Vir=AR_Vir.sort_values(by='date')

In [None]:
def process_df(df):
    # Convert year and day_of_year to datetime
    df['date'] = pd.to_datetime(df['year'].astype(str) + df['day_of_year'].astype(str), format='%Y%j')
    
    # Calculate NIRv
    df['NIRv'] = (df['NDVI_point'] / 10000) * (df['NIR_point'] / 10000)

    # Sort the DataFrame by date
    df = df.sort_values(by='date')
    
    return df


In [None]:
AR_Vir=process_df(AR_Vir)

In [None]:
#vcalculate NIRv for all other dfs

AU_Dry_NIRv=process_df(AU_Dry)
BE_Vie_NIRv=process_df(BE_Vie)
CA_TP1_NIRv=process_df(CA_TP1)
CH_Cha_NIRv=process_df(CH_Cha)
DE_Gri_NIRv=process_df(DE_Gri)
FR_Pue_NIRv=process_df(FR_Pue)
GF_Guy_NIRv=process_df(GF_Guy)
IT_Col_NIRv=process_df(IT_Col)
NL_Loo_NIRv=process_df(NL_Loo)
RU_Cok_NIRv=process_df(RU_Cok)
RU_Fyo_NIRv=process_df(RU_Fyo)
US_PFa_NIRv=process_df(US_PFa)
US_Var_NIRv=process_df(US_Var)
ZA_Kru_NIRv=process_df(ZA_Kru)


In [None]:
# export all of the dfs to CSVs

In [None]:
AR_Vir.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/AR_Vir_NIR.csv',index=False)

In [None]:
AU_Dry_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/AU_Dry_NIRv.csv',index=False)

In [None]:
BE_Vie_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/BE_Vie_NIR.csv',index=False)

In [None]:
CA_TP1_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/CA_TP1_NIR.csv',index=False)

In [None]:
CH_Cha_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/CH_Cha_NIR.csv',index=False)

In [None]:
DE_Gri_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/DE_Gri_NIR.csv',index=False)

In [None]:
FR_Pue_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/FR_Pue_NIR.csv',index=False)

In [None]:
GF_Guy_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/GF_Guy_NIR.csv',index=False)

In [None]:
IT_Col_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/IT_Col_NIR.csv',index=False)

In [None]:
RU_Cok_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/RU_CokNIR.csv',index=False)

In [None]:
RU_Fyo_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/RU_Fyo_NIR.csv',index=False)

In [None]:
US_PFa_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/US_PFa_NIR.csv',index=False)

In [None]:
US_Var_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/US_Var_NIR.csv',index=False)

In [None]:
ZA_Kru_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/ZA_Kru_NIR.csv',index=False)

In [None]:
NL_Loo_NIRv.to_csv('/Users/abigailbase/PROJECT FILES/NIRv DFs/NL_Loo_NIR.csv',index=False)