In [40]:
import pandas as pd
import numpy as np
import ftplib
import requests
import matplotlib.pyplot as plt
import os
import re
from datetime import datetime

In [11]:
# connect to the server
ftp = ftplib.FTP('ftp.ncdc.noaa.gov', timeout=30) #pass the url without protocol
ftp.login() #pass credentials if anonymous access is not allowed

# switch to the directory containing the data
ftp.cwd('/pub/data/swdi/database-csv/v2/')
ftp.pwd()

httpurl = 'https://www.ncei.noaa.gov/pub/data/swdi/database-csv/v2/'
# get the list of files in this ftp dir
all_files= ftp.nlst()

In [12]:
#all_files

In [13]:
pattern = r"meso-[0-9]{4}\."
meso_files = [i for i in all_files if bool(re.match(pattern, i))]
meso_files.sort()
print(meso_files)

['meso-1995.csv.gz', 'meso-1996.csv.gz', 'meso-1997.csv.gz', 'meso-1998.csv.gz', 'meso-1999.csv.gz', 'meso-2000.csv.gz', 'meso-2001.csv.gz', 'meso-2002.csv.gz', 'meso-2003.csv.gz', 'meso-2004.csv.gz', 'meso-2005.csv.gz', 'meso-2006.csv.gz', 'meso-2007.csv.gz', 'meso-2008.csv.gz', 'meso-2009.csv.gz', 'meso-2010.csv.gz']


In [14]:
query_parameters = {"downloadformat": "csv"}
response = requests.get(httpurl+meso_files[-1], params=query_parameters)

In [15]:
response.url

'https://www.ncei.noaa.gov/pub/data/swdi/database-csv/v2/meso-2010.csv.gz?downloadformat=csv'

In [16]:
response.ok

True

In [17]:
response.status_code

200

In [24]:
with open(r'../weather_data/meso/' 
              + meso_files[-1], "wb") as file:
    file.write(response.content)

In [42]:
meso_2010 = pd.read_csv(r'../weather_data/meso/' 
              + meso_files[-1], skiprows=2, parse_dates=['#ZTIME'])
meso_2010.head()

Unnamed: 0,#ZTIME,LON,LAT,WSR_ID,CELL_ID,CELL_TYPE,RANGE,AZIMUTH,BASE_HEIGHT,TOP_HEIGHT,HEIGHT,RADIUS,AZDIA,SHEAR
0,2010-01-01 00:00:25,-95.20293,32.06692,KSHV,M2,UNC SHR,73,252,7.7,7.7,7.7,1.8,4.1,5
1,2010-01-01 00:00:50,-109.95558,45.20309,KBLX,,UNC SHR,69,236,14.3,14.3,14.3,2.3,2.9,11
2,2010-01-01 00:00:50,-110.10992,45.12659,KBLX,,UNC SHR,77,236,16.6,16.6,16.6,13.4,3.2,10
3,2010-01-01 00:00:50,-109.65312,44.74401,KBLX,,UNC SHR,80,214,17.3,17.3,17.3,10.4,2.6,9
4,2010-01-01 00:00:50,-110.00047,45.99929,KBLX,,UNC SHR,59,279,18.3,18.3,18.3,1.2,3.7,6


In [26]:
pip install reverse-geocoder

Note: you may need to restart the kernel to use updated packages.


In [27]:
import reverse_geocoder as rg

In [28]:
meso_2010 = pd.read_csv('../weather_data/meso/meso-2010.csv', skiprows=2)

In [33]:
meso_2010.head()

Unnamed: 0,#ZTIME,LON,LAT,WSR_ID,CELL_ID,CELL_TYPE,RANGE,AZIMUTH,BASE_HEIGHT,TOP_HEIGHT,HEIGHT,RADIUS,AZDIA,SHEAR
0,2010-01-01 00:00:25,-95.20293,32.06692,KSHV,M2,UNC SHR,73,252,7.7,7.7,7.7,1.8,4.1,5
1,2010-01-01 00:00:50,-109.95558,45.20309,KBLX,,UNC SHR,69,236,14.3,14.3,14.3,2.3,2.9,11
2,2010-01-01 00:00:50,-110.10992,45.12659,KBLX,,UNC SHR,77,236,16.6,16.6,16.6,13.4,3.2,10
3,2010-01-01 00:00:50,-109.65312,44.74401,KBLX,,UNC SHR,80,214,17.3,17.3,17.3,10.4,2.6,9
4,2010-01-01 00:00:50,-110.00047,45.99929,KBLX,,UNC SHR,59,279,18.3,18.3,18.3,1.2,3.7,6


In [30]:
# first add a new column containing both lat and lon
locations = list(zip(meso_2010['LAT'],meso_2010['LON']))
meso_2010['location'] = locations

# then get the county (admin2) into a new column
address = rg.search(locations)
meso_2010['county'] = [x['admin2'] for x in address]
meso_2010['state'] = [x['admin1'] for x in address]

Loading formatted geocoded file...


In [37]:
meso_2010.head()

Unnamed: 0,#ZTIME,LON,LAT,WSR_ID,CELL_ID,CELL_TYPE,RANGE,AZIMUTH,BASE_HEIGHT,TOP_HEIGHT,HEIGHT,RADIUS,AZDIA,SHEAR
0,2010-01-01 00:00:25,-95.20293,32.06692,KSHV,M2,UNC SHR,73,252,7.7,7.7,7.7,1.8,4.1,5
1,2010-01-01 00:00:50,-109.95558,45.20309,KBLX,,UNC SHR,69,236,14.3,14.3,14.3,2.3,2.9,11
2,2010-01-01 00:00:50,-110.10992,45.12659,KBLX,,UNC SHR,77,236,16.6,16.6,16.6,13.4,3.2,10
3,2010-01-01 00:00:50,-109.65312,44.74401,KBLX,,UNC SHR,80,214,17.3,17.3,17.3,10.4,2.6,9
4,2010-01-01 00:00:50,-110.00047,45.99929,KBLX,,UNC SHR,59,279,18.3,18.3,18.3,1.2,3.7,6


In [38]:
def string_split_first(s):
    parts = s.split(' ', 1)
    return parts[0]
def string_split_second(s):
    parts = s.split(' ', 1)
    return parts[1]

In [45]:
def date_to_string(d):
    return d.strftime('%Y-%m-%d %H:%M:%S')

In [46]:
meso_2010['DATE_STRING']=meso_2010['#ZTIME'].apply(date_to_string)

In [47]:
meso_2010.head()

Unnamed: 0,#ZTIME,LON,LAT,WSR_ID,CELL_ID,CELL_TYPE,RANGE,AZIMUTH,BASE_HEIGHT,TOP_HEIGHT,HEIGHT,RADIUS,AZDIA,SHEAR,DATE_STRING
0,2010-01-01 00:00:25,-95.20293,32.06692,KSHV,M2,UNC SHR,73,252,7.7,7.7,7.7,1.8,4.1,5,2010-01-01 00:00:25
1,2010-01-01 00:00:50,-109.95558,45.20309,KBLX,,UNC SHR,69,236,14.3,14.3,14.3,2.3,2.9,11,2010-01-01 00:00:50
2,2010-01-01 00:00:50,-110.10992,45.12659,KBLX,,UNC SHR,77,236,16.6,16.6,16.6,13.4,3.2,10,2010-01-01 00:00:50
3,2010-01-01 00:00:50,-109.65312,44.74401,KBLX,,UNC SHR,80,214,17.3,17.3,17.3,10.4,2.6,9,2010-01-01 00:00:50
4,2010-01-01 00:00:50,-110.00047,45.99929,KBLX,,UNC SHR,59,279,18.3,18.3,18.3,1.2,3.7,6,2010-01-01 00:00:50


In [49]:
meso_2010['DATE']=meso_2010['DATE_STRING'].apply(string_split_first)

In [50]:
meso_2010.head()

Unnamed: 0,#ZTIME,LON,LAT,WSR_ID,CELL_ID,CELL_TYPE,RANGE,AZIMUTH,BASE_HEIGHT,TOP_HEIGHT,HEIGHT,RADIUS,AZDIA,SHEAR,DATE_STRING,DATE
0,2010-01-01 00:00:25,-95.20293,32.06692,KSHV,M2,UNC SHR,73,252,7.7,7.7,7.7,1.8,4.1,5,2010-01-01 00:00:25,2010-01-01
1,2010-01-01 00:00:50,-109.95558,45.20309,KBLX,,UNC SHR,69,236,14.3,14.3,14.3,2.3,2.9,11,2010-01-01 00:00:50,2010-01-01
2,2010-01-01 00:00:50,-110.10992,45.12659,KBLX,,UNC SHR,77,236,16.6,16.6,16.6,13.4,3.2,10,2010-01-01 00:00:50,2010-01-01
3,2010-01-01 00:00:50,-109.65312,44.74401,KBLX,,UNC SHR,80,214,17.3,17.3,17.3,10.4,2.6,9,2010-01-01 00:00:50,2010-01-01
4,2010-01-01 00:00:50,-110.00047,45.99929,KBLX,,UNC SHR,59,279,18.3,18.3,18.3,1.2,3.7,6,2010-01-01 00:00:50,2010-01-01


In [51]:
meso_2010['TIME']=meso_2010['DATE_STRING'].apply(string_split_second)

In [52]:
meso_2010.head()

Unnamed: 0,#ZTIME,LON,LAT,WSR_ID,CELL_ID,CELL_TYPE,RANGE,AZIMUTH,BASE_HEIGHT,TOP_HEIGHT,HEIGHT,RADIUS,AZDIA,SHEAR,DATE_STRING,DATE,TIME
0,2010-01-01 00:00:25,-95.20293,32.06692,KSHV,M2,UNC SHR,73,252,7.7,7.7,7.7,1.8,4.1,5,2010-01-01 00:00:25,2010-01-01,00:00:25
1,2010-01-01 00:00:50,-109.95558,45.20309,KBLX,,UNC SHR,69,236,14.3,14.3,14.3,2.3,2.9,11,2010-01-01 00:00:50,2010-01-01,00:00:50
2,2010-01-01 00:00:50,-110.10992,45.12659,KBLX,,UNC SHR,77,236,16.6,16.6,16.6,13.4,3.2,10,2010-01-01 00:00:50,2010-01-01,00:00:50
3,2010-01-01 00:00:50,-109.65312,44.74401,KBLX,,UNC SHR,80,214,17.3,17.3,17.3,10.4,2.6,9,2010-01-01 00:00:50,2010-01-01,00:00:50
4,2010-01-01 00:00:50,-110.00047,45.99929,KBLX,,UNC SHR,59,279,18.3,18.3,18.3,1.2,3.7,6,2010-01-01 00:00:50,2010-01-01,00:00:50
