In [31]:
import pandas as pd
import numpy as np
import ftplib
import requests
import re

# Type of files and their significance can be found in the doc: 
https://www.ncdc.noaa.gov/swdiws/csv/nx3tvs and https://www.ncdc.noaa.gov/swdiws

1) Hail: events where hail has been recorded
2) Meso and MDA: legacy mesocyclone and Mesocyclone Detection Algorithm. Legacy stops at 2010, and MDA doesn't start until 2007. Maybe use Meso until 2010 and continue with MDA after
3) TVS: Tornadic Vortex Signature Overlay- A product which shows an intense gate to gate azimuthal shear associated with tornadic-scale rotation.
4) Structure: Storm Cell Structure Information
5) nldn: Lightning strikes data. They give the count of lightning strikes within 0.1 in lat and lon of a given region
All the "tiles" files are Lightning strikes data. Maybe what type of event provoqued that strike? 

# Connect and get all the files

In [29]:
# connect to the server
ftp = ftplib.FTP('ftp.ncdc.noaa.gov', timeout=30) #pass the url without protocol
ftp.login() #pass credentials if anonymous access is not allowed

# switch to the directory containing the data
ftp.cwd('/pub/data/swdi/database-csv/v2/')
ftp.pwd()

httpurl = 'https://www.ncei.noaa.gov/pub/data/swdi/database-csv/v2/'
# get the list of files in this ftp dir
all_files= ftp.nlst()

In [67]:
all_files[:10]

['mda-2008.csv.gz',
 'hail-1995.csv.gz',
 'mda-2009.csv.gz',
 'hail-1996.csv.gz',
 'mda-2010.csv.gz',
 'hail-1997.csv.gz',
 'mda-2011.csv.gz',
 'hail-1998.csv.gz',
 'mda-2012.csv.gz',
 'hail-1999.csv.gz']

## Example with dowloading all hail files

In [57]:
pattern = r"hail-[0-9]{4}\."
hail_files = [i for i in all_files if bool(re.match(pattern, i))]
hail_files.sort()
print(hail_files)

['hail-1995.csv.gz', 'hail-1996.csv.gz', 'hail-1997.csv.gz', 'hail-1998.csv.gz', 'hail-1999.csv.gz', 'hail-2000.csv.gz', 'hail-2001.csv.gz', 'hail-2002.csv.gz', 'hail-2003.csv.gz', 'hail-2004.csv.gz', 'hail-2005.csv.gz', 'hail-2006.csv.gz', 'hail-2007.csv.gz', 'hail-2008.csv.gz', 'hail-2009.csv.gz', 'hail-2010.csv.gz', 'hail-2011.csv.gz', 'hail-2012.csv.gz', 'hail-2013.csv.gz', 'hail-2014.csv.gz', 'hail-2015.csv.gz', 'hail-2016.csv.gz', 'hail-2017.csv.gz', 'hail-2018.csv.gz', 'hail-2019.csv.gz', 'hail-2020.csv.gz', 'hail-2021.csv.gz', 'hail-2022.csv.gz', 'hail-2023.csv.gz']


## This downloads one hail file from 2013, stores it on disk, and then reads them into a csv 

In [60]:
query_parameters = {"downloadformat": "csv"}
response = requests.get(httpurl+hail_files[-1], params=query_parameters)

In [61]:
response.url

'https://www.ncei.noaa.gov/pub/data/swdi/database-csv/v2/hail-2023.csv.gz?downloadformat=csv'

In [62]:
response.ok

True

In [63]:
response.status_code

200

In [64]:
with open(r'../weather_data/hail/' 
              + hail_files[-1], "wb") as file:
    file.write(response.content)

In [65]:
hail_2023 = pd.read_csv(r'../weather_data/hail/' 
              + hail_files[-1], skiprows=2, parse_dates=['#ZTIME'])
hail_2023.head()

Unnamed: 0,#ZTIME,LON,LAT,WSR_ID,CELL_ID,RANGE,AZIMUTH,SEVPROB,PROB,MAXSIZE
0,2023-01-01 00:01:45,-76.98093,33.78684,KRAX,K7,135,146,-999,-999,-999.0
1,2023-01-01 00:01:45,-75.8462,36.05329,KRAX,D8,131,79,-999,-999,-999.0
2,2023-01-01 00:03:25,-76.96301,33.80832,KLTX,H7,74,98,0,10,0.5
3,2023-01-01 00:04:05,-64.43412,19.83733,TJUA,L0,139,42,-999,-999,-999.0
4,2023-01-01 00:06:21,-77.72771,32.79592,KMHX,X9,126,200,-999,-999,-999.0


# Data looks great, so we can do it for all of any type of file

In [73]:
event_types = ['hail', 'structure', 'tvs', 'nldn-tiles']
event = event_types[-1] # Lightning

In [74]:
pattern = event+r"-[0-9]{4}\."
hail_files = [i for i in all_files if bool(re.match(pattern, i))]
hail_files.sort()
print(hail_files)

['nldn-tiles-1986.csv.gz', 'nldn-tiles-1987.csv.gz', 'nldn-tiles-1988.csv.gz', 'nldn-tiles-1989.csv.gz', 'nldn-tiles-1990.csv.gz', 'nldn-tiles-1991.csv.gz', 'nldn-tiles-1992.csv.gz', 'nldn-tiles-1993.csv.gz', 'nldn-tiles-1994.csv.gz', 'nldn-tiles-1995.csv.gz', 'nldn-tiles-1996.csv.gz', 'nldn-tiles-1997.csv.gz', 'nldn-tiles-1998.csv.gz', 'nldn-tiles-1999.csv.gz', 'nldn-tiles-2000.csv.gz', 'nldn-tiles-2001.csv.gz', 'nldn-tiles-2002.csv.gz', 'nldn-tiles-2003.csv.gz', 'nldn-tiles-2004.csv.gz', 'nldn-tiles-2005.csv.gz', 'nldn-tiles-2006.csv.gz', 'nldn-tiles-2007.csv.gz', 'nldn-tiles-2008.csv.gz', 'nldn-tiles-2009.csv.gz', 'nldn-tiles-2010.csv.gz', 'nldn-tiles-2011.csv.gz', 'nldn-tiles-2012.csv.gz', 'nldn-tiles-2013.csv.gz', 'nldn-tiles-2014.csv.gz', 'nldn-tiles-2015.csv.gz', 'nldn-tiles-2016.csv.gz', 'nldn-tiles-2017.csv.gz', 'nldn-tiles-2018.csv.gz', 'nldn-tiles-2019.csv.gz', 'nldn-tiles-2020.csv.gz', 'nldn-tiles-2021.csv.gz', 'nldn-tiles-2022.csv.gz', 'nldn-tiles-2023.csv.gz']


In [None]:
query_parameters = {"downloadformat": "csv"}
for file in hail_files:
    response = requests.get(httpurl+file, params=query_parameters)
    if response.ok:
        print("Downloaded")
    with open(r'../weather_data/{}/{}'.format(event, file), "wb") as f:
        f.write(response.content)
    print('Saved in folder')