# Driver for MultiProcessing NCEP Data

## 1. Install the libraries needed to run the program

In [None]:
# Only run this the first time running this program. There may be some other dependencies you may need to install. 
# Check the requirements based on the error.

!pip install --upgrade bottleneck
!pip install cfgrib==0.9.13.0
!pip install ipynb
!pip install pandas
!pip install requests
!pip install xarray
!pip install multiprocess
!pip install python-csv

## 2. Import and set local directory

In [None]:
import os
import requests
import xarray as xr
from multiprocessing import Pool
from ipynb.fs.defs.MULTI_FUNC import process_file
import csv


# Set the directory
desired_directory = '/Users/trak/Downloads/TEMPOOL'

# Change the current working directory
os.chdir(desired_directory)

# Verify the change
print("Current Working Directory: ", os.getcwd())

## 3. Define the Range of Date/month/year to pull

In [None]:
from calendar import Calendar, monthrange

c = Calendar()

# Define the range of years and months and date
start_year = 2020
start_month = 1
start_day = 1

end_year = 2024
end_month = 12

dmy_list = []

# Generate the list of dates
for year in range(start_year, end_year + 1):
    last_month = end_month if year == end_year else 12
    for month in range(start_month, last_month + 1):
        for date in c.itermonthdates(year, month):
            if date.month == month:
                if year == start_year and month == start_month and date.day < start_day:
                    continue
                dmy_list.append(f"{date.year}-{date.month:02d}-{date.day:02d}")

#Verify the dates 
print(dmy_list)
print(len(dmy_list))

## 4. Set the lat,lon pair 

In [None]:
# The top lat corresponds to the bottom lon at the same index. Add as needed
lat_l= [13.75,13.75,14,13.75]
lon_l= [100.5,100.75,100.5,100.25]

## 5. Generate all the file names to extract

In [None]:
# Generate the list of files to load
file_to_load = []
# Add pixels as needed. Pixel number corresponds to the number of lat,lon pairs above
pixID= ['A','B','C','D']
for dmy in dmy_list:
    year, month, day = dmy.split('-')
    # Pull from 00,06,12,18
    for hour in ['00', '06', '12', '18']:
        for i in range(len(pixID)):
            file_to_load.append((dmy, hour, f"https://data.rda.ucar.edu/ds084.1/{year}/{year}{month}{day}/gfs.0p25.{year}{month}{day}{hour}.f000.grib2",pixID[i],lat_l[i],lon_l[i]))

# Print the total number of files and test a file name below
print(f"Total files to process: {len(file_to_load)}")
print(file_to_load[12])

## 6. Main Program

In [None]:
%%capture

import multiprocessing as mp
import os
import requests
import xarray as xr
from multiprocessing import Pool
from ipynb.fs.defs.MULTI_FUNC_FOURs import wrapper_process_file
import csv
from IPython.display import clear_output



# Process files in chunks
chunk_size = 1  # Adjust this based on memory constraints
for i in range(0, len(file_to_load), chunk_size):
    clear_output(wait=True)
    chunk = file_to_load[i:i+chunk_size]
    # Use manager to make sure the multiprocess adds the output in order
    with mp.Manager() as manager:
        VR = manager.dict()
        P = manager.dict()
        G = manager.dict()
        meta = manager.dict()
        u10 = manager.dict()
        u20 = manager.dict()
        u30 = manager.dict()
        u40 = manager.dict()
        u50 = manager.dict()
        u80 = manager.dict()
        u100 = manager.dict()
        v10 = manager.dict()
        v20 = manager.dict()
        v30 = manager.dict()
        v40 = manager.dict()
        v50 = manager.dict()
        v80 = manager.dict()
        v100 = manager.dict()
        PR = manager.dict()
        T = manager.dict()
        
        # Run multiprocessing in chunks
        processes = []
        for idx, (dmy, hour, file,pix,lat,lon) in enumerate(chunk):
            p = mp.Process(target=wrapper_process_file, args=(file, VR, P, G,PR,T, idx, meta,u10,u20,u30,u40,u50,u80,u100,v10,v20,v30,v40,v50,v80,v100,pix,lat,lon))
            p.start()
            processes.append(p)
            clear_output(wait=True)
        
        for p in processes:
            p.join()
            clear_output(wait=True)
        
        # Collect results
        fields = ['Date', 'Time', 'VRATE', 'PBLH', 'WIND_SPEED(GUST)','PRATE','Temp','pixelID','lat','lon','u10','v10','u20','v20','u30','v30','u40','v40','u50','v50','u80','v80','u100','v100', 'file']
        rows = []
        # Write results to csv file
        for idx, (dmy, hour, file,pix,lat,lon) in enumerate(chunk):
            rows.append([dmy, hour, VR.get(idx), P.get(idx), G.get(idx),PR.get(idx),T.get(idx),pix,lat,lon,u10.get(idx),u20.get(idx),u30.get(idx),u40.get(idx),u50.get(idx),u80.get(idx),u100.get(idx),v10.get(idx),v20.get(idx),v30.get(idx),v40.get(idx),v50.get(idx),v80.get(idx),v100.get(idx),meta.get(idx)])
        with open('YOURFILENAME.csv', 'a') as csvfile:
            csvwriter = csv.writer(csvfile)
            if i == 0:
                csvwriter.writerow(fields)  # Write header only once
            csvwriter.writerows(rows)
        
        # Clear memory
        del VR, P, G, meta,u10,u20,u30,u40,u50,u80,u100,v10,v20,v30,v40,v50,v80,v100,PR,T


print("Processing complete.")
