In [31]:
!pip install reverse-geocoder

Collecting reverse-geocoder
  Downloading reverse_geocoder-1.5.1.tar.gz (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: reverse-geocoder
  Building wheel for reverse-geocoder (setup.py) ... [?25ldone
[?25h  Created wheel for reverse-geocoder: filename=reverse_geocoder-1.5.1-py3-none-any.whl size=2268064 sha256=cadee9dad87eec3382986a40422b4d5d275ab8187e0ecbcf9b03ef7cb80b63d4
  Stored in directory: /Users/nolanschock/Library/Caches/pip/wheels/65/42/5e/223fcd5dc869ff98d5ee6b19e236f82828e4e3c039328ebe20
Successfully built reverse-geocoder
Installing collected packages: reverse-geocoder
Successfully installed reverse-geocoder-1.5.1


In [32]:
import pandas as pd
import numpy as np
import ftplib
import requests
import os
import re
import reverse_geocoder as rg

In [6]:
event_types = {'hail':'hail', 'storm_structure':'structure', 'tornados':'tvs', 'lightning':'nldn-tiles', 'mesocyclone':'mda'}
for event in event_types:
    path = '../weather_data/'+event
    if not os.path.exists(path):
        os.mkdir(path)

In [7]:
# connect to the server
ftp = ftplib.FTP('ftp.ncdc.noaa.gov', timeout=30) #pass the url without protocol
ftp.login() #pass credentials if anonymous access is not allowed

# switch to the directory containing the data
ftp.cwd('/pub/data/swdi/database-csv/v2/')
ftp.pwd()

httpurl = 'https://www.ncei.noaa.gov/pub/data/swdi/database-csv/v2/'
# get the list of files in this ftp dir
all_files= ftp.nlst()

In [8]:
def download_file(year, event_type):
    event_name = event_types[event_type]
    pattern = event_name+"-"+str(year)
    file_name = [fname for fname in all_files if pattern in fname]
    if len(file_name) == 0:
        return "No file in that year for that event type" 
    file_name = file_name[0]
    print("Considering file ", file_name)
    if os.path.exists('../weather_data/{}/{}'.format(event_type, file_name)):
        return "file already exists"
    query_parameters = {"downloadformat": "csv"}
    print("Getting the response from the URL .....")
    response = requests.get(httpurl+file_name, params=query_parameters)
    if response.ok:
        print("Downloaded succesfully")
    with open(r'../weather_data/{}/{}'.format(event_type, file_name), "wb") as f:
        f.write(response.content)
    print('Saved in folder')

In [9]:
def read_weather(year, event_type):
    files = os.listdir('../weather_data/'+event_type)
    file_name = [fname for fname in files if str(year) in fname]
    if len(file_name) == 0:
        return "No file in that year for that event type"
    if len(file_name) > 1:
        return "Multiple files with that year in their name"
    if event_type == 'lightning':
        return pd.read_csv(r'../weather_data/'+event_type+'/'
                  + file_name[0], skiprows=2, parse_dates=['#ZDAY'])
    return pd.read_csv(r'../weather_data/'+event_type+'/'
                  + file_name[0], skiprows=2, parse_dates=['#ZTIME'])

## Process lightning data

In [10]:
for year in range(1987,2024):
    download_file(year,'lightning')

Considering file  nldn-tiles-1986.csv.gz
Getting the response from the URL .....
Downloaded succesfully
Saved in folder
Considering file  nldn-tiles-1987.csv.gz
Getting the response from the URL .....
Downloaded succesfully
Saved in folder
Considering file  nldn-tiles-1988.csv.gz
Getting the response from the URL .....
Downloaded succesfully
Saved in folder
Considering file  nldn-tiles-1989.csv.gz
Getting the response from the URL .....
Downloaded succesfully
Saved in folder
Considering file  nldn-tiles-1990.csv.gz
Getting the response from the URL .....
Downloaded succesfully
Saved in folder
Considering file  nldn-tiles-1991.csv.gz
Getting the response from the URL .....
Downloaded succesfully
Saved in folder
Considering file  nldn-tiles-1992.csv.gz
Getting the response from the URL .....
Downloaded succesfully
Saved in folder
Considering file  nldn-tiles-1993.csv.gz
Getting the response from the URL .....
Downloaded succesfully
Saved in folder
Considering file  nldn-tiles-1994.csv.gz

In [34]:
lightning_data = pd.concat([read_weather(year,'lightning') for year in range(2003,2024)])

In [35]:
# reverse geocode to find state and county of given lightning strike
address = rg.search(list(zip(lightning_data['CENTERLAT'],lightning_data['CENTERLON'])))
lightning_data['STATE'] = [x['admin1'] for x in address]
lightning_data['COUNTY'] = [x['admin2'] for x in address]

In [43]:
lightning_data

Unnamed: 0,#ZDAY,CENTERLON,CENTERLAT,TOTAL_COUNT,STATE,COUNTY
0,2003-01-01,-80.8,27.0,3,Florida,Glades County
1,2003-01-01,-78.2,34.5,1,North Carolina,Pender County
2,2003-01-01,-83.2,23.8,6,Artemisa,
3,2003-01-01,-77.3,34.6,4,North Carolina,Onslow County
4,2003-01-01,-76.8,33.9,1,North Carolina,Carteret County
...,...,...,...,...,...,...
3116145,2023-12-31,-126.7,33.3,1,California,Monterey County
3116146,2023-12-31,-126.6,33.3,1,California,Monterey County
3116147,2023-12-31,-126.5,33.3,2,California,Monterey County
3116148,2023-12-31,-126.8,33.4,1,California,Monterey County
