# Data Exploration

## Imports

In [1]:
import numpy as np
import pandas as pd
import requests
import shapefile
import netCDF4
import os

## 1. USGS Wind Turbine Database

Data retrieval method: API

Documentation: https://eerscmap.usgs.gov/uswtdb/api-doc/

Example request response for wind turbine with 'case_id'=3038257. Response data contains turbine ID ('case_id'), rated capacity ('t_cap'), dimensions, coordinates ('xlong' and 'ylat'), and other relevant data. Calling the json() method on the response object gives a list of dictionaries, where each dictionary represents a turbine from the response.

In [9]:
base_path = 'https://eersc.usgs.gov/api/uswtdb/v1/'
resp = requests.get(f"{base_path}turbines?&case_id=eq.3038257")
resp.json()[0]

{'case_id': 3038257,
 'faa_ors': '19-022348',
 'faa_asn': '2011-WTE-22311-OE',
 'usgs_pr_id': 20977,
 't_state': 'IA',
 't_county': 'Story County',
 't_fips': '19169',
 'p_name': 'AG Land 4',
 'p_year': 2012,
 'p_tnum': 6,
 'p_cap': 9.6,
 't_manu': 'GE Wind',
 't_model': 'GE1.6-82.5',
 't_cap': 1600,
 't_hh': 80.0,
 't_rd': 82.5,
 't_rsa': 5345.62,
 't_ttlh': 121.3,
 't_conf_atr': 3,
 't_conf_loc': 3,
 't_img_date': '1/1/2013',
 't_img_srce': 'NAIP',
 'xlong': -93.3549,
 'ylat': 41.90419,
 'eia_id': None}

Example request for multiple turbines in USWTDB

In [22]:
params = {'offset': 0, 'limit': 5}
resp = requests.get(f"{base_path}turbines", params=params)
turbine_data = resp.json()
turbine_data

[{'case_id': 3005443,
  'faa_ors': None,
  'faa_asn': None,
  'usgs_pr_id': 5840,
  't_state': 'CA',
  't_county': 'Kern County',
  't_fips': '6029',
  'p_name': '251 Wind',
  'p_year': 1987,
  'p_tnum': 194,
  'p_cap': 18.43,
  't_manu': 'Vestas',
  't_model': None,
  't_cap': 95,
  't_hh': None,
  't_rd': None,
  't_rsa': None,
  't_ttlh': None,
  't_conf_atr': 2,
  't_conf_loc': 3,
  't_img_date': '5/8/2018',
  't_img_srce': 'Digital Globe',
  'xlong': -118.35109,
  'ylat': 35.0919,
  'eia_id': 52161},
 {'case_id': 3072704,
  'faa_ors': None,
  'faa_asn': None,
  'usgs_pr_id': 5146,
  't_state': 'CA',
  't_county': 'Kern County',
  't_fips': '6029',
  'p_name': '251 Wind',
  'p_year': 1987,
  'p_tnum': 194,
  'p_cap': 18.43,
  't_manu': 'Vestas',
  't_model': None,
  't_cap': 95,
  't_hh': None,
  't_rd': None,
  't_rsa': None,
  't_ttlh': None,
  't_conf_atr': 2,
  't_conf_loc': 3,
  't_img_date': '5/8/2018',
  't_img_srce': 'Digital Globe',
  'xlong': -118.3642,
  'ylat': 35.07764

Convert response to DataFrame

In [23]:
df = pd.DataFrame(turbine_data)
df.head()

Unnamed: 0,case_id,faa_ors,faa_asn,usgs_pr_id,t_state,t_county,t_fips,p_name,p_year,p_tnum,...,t_rd,t_rsa,t_ttlh,t_conf_atr,t_conf_loc,t_img_date,t_img_srce,xlong,ylat,eia_id
0,3005443,,,5840,CA,Kern County,6029,251 Wind,1987,194,...,,,,2,3,5/8/2018,Digital Globe,-118.35109,35.0919,52161
1,3072704,,,5146,CA,Kern County,6029,251 Wind,1987,194,...,,,,2,3,5/8/2018,Digital Globe,-118.3642,35.07764,52161
2,3072695,,,5143,CA,Kern County,6029,251 Wind,1987,194,...,,,,2,3,5/8/2018,Digital Globe,-118.36441,35.07744,52161
3,3072661,,,5149,CA,Kern County,6029,251 Wind,1987,194,...,,,,2,3,5/8/2018,Digital Globe,-118.36376,35.07791,52161
4,3005333,,,5109,CA,Kern County,6029,251 Wind,1987,194,...,,,,2,3,5/8/2018,Digital Globe,-118.36869,35.07529,52161


The API will overload if a request corresponds to too many records. We can work around this by calling the API in smaller batches.

In [15]:
# Initial call to API with response limit of 50 records
turbine_df = pd.DataFrame()
offset, limit = 0, 50
params = {'offset': offset, 'limit': limit}
resp = requests.get(f"{base_path}turbines", params=params)
turbine_data = resp.json()

# This loop calls the API and adds the response records to turbine_df
# until it has returned all turbines in the USWTDB
while len(turbine_data):
    df = pd.DataFrame(turbine_data)
    turbine_df = pd.concat([turbine_df, df])
    offset += limit
    resp = requests.get(f"{base_path}turbines", params=params)
    turbine_data = resp.json()

turbine_data.head()

<Response [200]>

For best practices, here's the above code as a function.

In [29]:
"""
Parameters:
    offset - record index from which API response should begin; default 0
    limit - max number of records to return in each API call; default 50
    params - dict of query parameters for API call; default None,
             initialized with default offset and limit values

Returns:
    turbine_df - pandas DataFrame object containing records for all turbines
                 corresponding to query
"""
def USWTDB_data(offset=0, limit=50, params=None):
    base_path = 'https://eersc.usgs.gov/api/uswtdb/v1/'
    if not params:
        params = {'offset': offset, 'limit': limit}
    else:
        params['offset'] = params.get('offset', offset)
        params['limit'] = params.get('limit', limit)

    turbine_df = pd.DataFrame()
    resp = requests.get(f"{base_path}turbines", params=params)
    turbine_data = resp.json()

    while len(turbine_data):
        df = pd.DataFrame(turbine_data)
        turbine_df = pd.concat([turbine_df, df])
        offset += limit
        resp = requests.get(f"{base_path}turbines", params=params)
        turbine_data = resp.json()

    return turbine_df

## 2. USA Major Cities Dataset

Data retrieval method:
- Data file - CSV download (data/USA_Major_Cities.csv)
- Geographic data - Shapefile download (data/USA_Major_Cities_SHP/)

Documentation: https://hub.arcgis.com/datasets/esri::usa-major-cities

### Data file

In [30]:
df = pd.read_csv('./data/USA_Major_Cities.csv')
df.head()

Unnamed: 0,FID,NAME,CLASS,ST,STFIPS,PLACEFIPS,CAPITAL,POP_CLASS,POPULATION,POP2010,...,MARHH_CHD,MARHH_NO_C,MHH_CHILD,FHH_CHILD,FAMILIES,AVE_FAM_SZ,HSE_UNITS,VACANT,OWNER_OCC,RENTER_OCC
0,1,Ammon,city,ID,16,1601990,,6,15181,13816,...,1618,1131,106,335,3352,3.61,4747,271,3205,1271
1,2,Blackfoot,city,ID,16,1607840,,6,11946,11899,...,1091,1081,174,381,2958,3.31,4547,318,2788,1441
2,3,Boise City,city,ID,16,1608830,State,8,225405,205671,...,16708,21233,2414,5919,50647,2.97,92700,6996,52345,33359
3,4,Burley,city,ID,16,1611260,,6,10727,10345,...,950,861,139,358,2499,3.37,3885,241,2183,1461
4,5,Caldwell,city,ID,16,1612250,,7,53942,46237,...,4407,3113,686,1755,10776,3.51,16323,1428,9699,5196


### Shapefile

The code below reads the Shapefile and shows that it contains 3886 shapes (one for each record in the CSV file).

See https://code.google.com/archive/p/pyshp/wikis/PyShpDocs.wiki for PyShp info.

In [2]:
sf = shapefile.Reader('././data/USA_Major_Cities_SHP/0c5a2fa1-3463-4fc7-99e5-e206023a7e682020313-1-nmlntcmln9c')
len(sf.shapes())

3886

The 'points' attribute gives the longitude and latitude of each city (each shape object in sf.shapes() represents a city in this case).

In [13]:
sf.shapes()[0].points

[[-111.95410286999999, 43.47579235600005]]

The records() method gives the attributes for each city in the shapefile. These attributes are the same as those contained in the CSV file above.

In [21]:
sf.records()[0].as_dict()

{'FID': 1,
 'NAME': 'Ammon',
 'CLASS': 'city',
 'ST': 'ID',
 'STFIPS': '16',
 'PLACEFIPS': '1601990',
 'CAPITAL': '',
 'POP_CLASS': 6,
 'POPULATION': 15181,
 'POP2010': 13816,
 'WHITE': 13002,
 'BLACK': 73,
 'AMERI_ES': 67,
 'ASIAN': 113,
 'HAWN_PI': 9,
 'HISPANIC': 884,
 'OTHER': 307,
 'MULT_RACE': 245,
 'MALES': 6750,
 'FEMALES': 7066,
 'AGE_UNDER5': 1468,
 'AGE_5_9': 1503,
 'AGE_10_14': 1313,
 'AGE_15_19': 1058,
 'AGE_20_24': 734,
 'AGE_25_34': 2031,
 'AGE_35_44': 1767,
 'AGE_45_54': 1446,
 'AGE_55_64': 1136,
 'AGE_65_74': 665,
 'AGE_75_84': 486,
 'AGE_85_UP': 209,
 'MED_AGE': 29.6,
 'MED_AGE_M': 28.0,
 'MED_AGE_F': 30.8,
 'HOUSEHOLDS': 4476,
 'AVE_HH_SZ': 3.05,
 'HSEHLD_1_M': 457,
 'HSEHLD_1_F': 648,
 'MARHH_CHD': 1618,
 'MARHH_NO_C': 1131,
 'MHH_CHILD': 106,
 'FHH_CHILD': 335,
 'FAMILIES': 3352,
 'AVE_FAM_SZ': 3.61,
 'HSE_UNITS': 4747,
 'VACANT': 271,
 'OWNER_OCC': 3205,
 'RENTER_OCC': 1271}

Not that it's necessarily needed, but we could recreate the dataframe from the CSV with additional fields for latitude and longitude.

In [24]:
items = list()
for record, shape in zip(sf.records(), sf.shapes()):
    attributes = record.as_dict()
    attributes['latitude'] = shape.points[0][1]
    attributes['longitude'] = shape.points[0][0]
    items.append(attributes)

df = pd.DataFrame(items)
df.head()

Unnamed: 0,FID,NAME,CLASS,ST,STFIPS,PLACEFIPS,CAPITAL,POP_CLASS,POPULATION,POP2010,...,MHH_CHILD,FHH_CHILD,FAMILIES,AVE_FAM_SZ,HSE_UNITS,VACANT,OWNER_OCC,RENTER_OCC,latitude,longitude
0,1,Ammon,city,ID,16,1601990,,6,15181,13816,...,106,335,3352,3.61,4747,271,3205,1271,43.475792,-111.954103
1,2,Blackfoot,city,ID,16,1607840,,6,11946,11899,...,174,381,2958,3.31,4547,318,2788,1441,43.193937,-112.345567
2,3,Boise City,city,ID,16,1608830,State,8,225405,205671,...,2414,5919,50647,2.97,92700,6996,52345,33359,43.599015,-116.23011
3,4,Burley,city,ID,16,1611260,,6,10727,10345,...,139,358,2499,3.37,3885,241,2183,1461,42.536741,-113.793293
4,5,Caldwell,city,ID,16,1612250,,7,53942,46237,...,686,1755,10776,3.51,16323,1428,9699,5196,43.661626,-116.685619


## 3. U.S. Census Cartographic Boundary Dataset

In [None]:
# Need to confirm exactly which files we're going to use, but processing will be similar to above.

## 4. NOAA U.S. Annual Wind Speed Dataset
NetCDF files containing 2020 wind speed data.

Data retrieval:
- U-wind or east-west wind component (test file: data/uwnd.sig995.2020.nc)
- V-wind or north-south wind component (test file: data/vwnd.sig995.2020.nc)
- U- and V- wind speed data from last 10 years for analysis (data/Wind_Speed_Data/)

Documentation for netCDF4 library: https://unidata.github.io/netcdf4-python/netCDF4/index.html

Load the files:

In [38]:
f_uwind = netCDF4.Dataset('data/uwnd.sig995.2020.nc')
f_vwind = netCDF4.Dataset('data/vwnd.sig995.2020.nc')

The wind speed files are composed of three-dimensional arrays. The numerical elements represent the wind speed at a given longitude and latitude on a given day. The outermost elements are lists representing days, the second outermost elements are lists representing latitudes, and the elements of those lists are numbers (representing wind speed), whose index corresponds to a longitude. The structure is illustrated below.

data = [ day1, day2, ... , day_355 ]
day_i = [ lat1, lat2, ... , lat73 ]
lat_j = [ lon1, lon2, ... , lon144 ]

That is, data[i][j][k] is the wind speed at (lon[k], lat[j]) on day[i].

Note: There are only 355 days listed for 2020 because the data was initially downloaded on 12-22-20. The data should be re-downloaded before final analysis.

Below is a demonstration that calculates the 2020 mean U-wind speed for each latitude and longitude.

In [37]:
n_lat = f_uwind.dimensions['lat'].size
n_lon = f_uwind.dimensions['lon'].size
n_days = f_uwind.dimensions['time'].size

mean_u_wind_speed = np.zeros((n_lat, n_lon))
for day in f_uwind.variables['uwnd'][:]:
    for i in range(n_lat):
        for j in range(n_lon):
            mean_u_wind_speed[i][j] += day[i][j]
mean_u_wind_speed = mean_u_wind_speed / n_days
mean_u_wind_speed

array([[ 0.52704325,  0.47584605,  0.42387421, ...,  0.67302916,
         0.62612777,  0.57711365],
       [ 0.16281785,  0.11739536,  0.06929676, ...,  0.27640944,
         0.24169113,  0.20317   ],
       [ 1.1834517 ,  0.97429676,  0.76169112, ...,  1.7559869 ,
         1.58260662,  1.38859252],
       ...,
       [-3.01330887, -2.5580976 , -2.14725254, ..., -4.64626661,
        -4.06161873, -3.51556239],
       [-2.10359057, -1.63844971, -1.17978775, ..., -3.51182999,
        -3.04175958, -2.57182999],
       [-0.58823846, -0.46013986, -0.33105534, ..., -0.96535114,
        -0.84070323, -0.7155624 ]])

The following functions extract and manipulate wind speed data from a single netcdf file.

In [14]:
"""
Parameters:
    file - path to valid NetCDF4 wind speed file; designed for
           NOAA U-wind and V-wind files; default 'data/uwnd.sig995.2020.nc'

Returns:
    mean_speed - a n_lat x n_lon numpy array representing the
                 average wind speed for each (n_lat, n_lon)
                 combination over the duration of the data
                 contained in the file
"""
def mean_wind_speed(file='data/uwnd.sig995.2020.nc'):
    f_wind = netCDF4.Dataset(file)
    n_lat = f_wind.dimensions['lat'].size
    n_lon = f_wind.dimensions['lon'].size
    n_days = f_wind.dimensions['time'].size
    key = list(f_wind.variables)[3]

    mean_speed = np.zeros((n_lat, n_lon))
    for day in f_wind.variables[key][:]:
        for i in range(n_lat):
            for j in range(n_lon):
                mean_speed[i][j] += day[i][j]
    mean_speed = mean_speed / n_days
    return mean_speed

"""
Parameters:
    file - path to valid NetCDF4 wind speed file; designed for
           NOAA U-wind and V-wind files; default 'data/uwnd.sig995.2020.nc'

Returns:
    numpy array of latitude values from file
"""
def lat_array(file='data/uwnd.sig995.2020.nc'):
    f_wind = netCDF4.Dataset(file)
    return f_wind.variables['lat'][:].data

"""
Parameters:
    file - path to valid NetCDF4 wind speed file; designed for
           NOAA U-wind and V-wind files; default 'data/uwnd.sig995.2020.nc'

Returns:
    numpy array of longitude values from file
"""
def lon_array(file='data/uwnd.sig995.2020.nc'):
    f_wind = netCDF4.Dataset(file)
    return f_wind.variables['lon'][:].data

"""
Parameters:
    file - path to valid NetCDF4 wind speed file; designed for
           NOAA U-wind and V-wind files; default 'data/uwnd.sig995.2020.nc'

Returns:
    table - a pandas dataframe pivot table where the row indices are
            the file's latitudes and the column indices are the file's
            longitudes; the values represent the mean wind speed for
            the corresponding latitude and longitude indices
"""
def mean_wind_speed_table(file='data/uwnd.sig995.2020.nc'):
    f_wind = netCDF4.Dataset(file)
    mws = mean_wind_speed(file)
    lat_lon_speed = list()

    lon, lat = f_wind.variables['lon'][:].data, f_wind.variables['lat'][:].data
    n_lat, n_lon = f_wind.dimensions['lat'].size, f_wind.dimensions['lon'].size

    for i in range(n_lat):
        for j in range(n_lon):
            d = dict()
            d['lat'] = lat[i]
            d['lon'] = lon[j]
            d['mean_speed'] = mws[i][j]
            lat_lon_speed.append(d)

    df = pd.DataFrame(lat_lon_speed)
    table = pd.pivot_table(df, index='lat', columns='lon')
    return table

The following functions extract and manipulate wind speed data from a collection of netcdf files.

In [46]:
"""
Parameters:
    dir_path - path to directory of netcdf4 wind speed files;
               default 'data/Wind_Speed_Data/'

Returns:
    Python list containing two elements; the first element is
    a list of the U-wind component files in dir_path, and the
    second element is a list of the V-wind component files in
    dir_path
"""
def u_and_v_files(dir_path='data/Wind_Speed_Data/'):
    files = os.listdir(dir_path)
    if not dir_path.endswith('/'):
        dir_path += '/'
    u_files = [f'{dir_path}{file}' for file in files if 'uwnd' in file]
    v_files = [f'{dir_path}{file}' for file in files if 'vwnd' in file]
    return u_files, v_files

"""
Parameters:
    dir_path - path to directory of netcdf4 wind speed files;
               default 'data/Wind_Speed_Data/'

Returns:
    n_lat - number of latitude dimensions in files in dir_path
    n_lon - number of longitude dimensions in files in dir_path
"""
def lat_lon_dims_directory(dir_path='data/Wind_Speed_Data/'):
    u_files, v_files = u_and_v_files(dir_path)
    file = u_files[0] # could take any file from the bunch
    f_wind = netCDF4.Dataset(file)
    n_lat, n_lon = f_wind.dimensions['lat'].size, f_wind.dimensions['lon'].size
    return n_lat, n_lon

"""
Parameters:
    dir_path - path to directory of netcdf4 wind speed files;
               default 'data/Wind_Speed_Data/'

Returns:
    lat - np array of latitude dimensions in files in dir_path
    lon - np array of longitude dimensions in files in dir_path
"""
def lat_lon_vals_directory(dir_path='data/Wind_Speed_Data/'):
    u_files, v_files = u_and_v_files(dir_path)
    file = u_files[0] # could take any file from the bunch
    return lat_array(file), lon_array(file)

"""
Parameters:
    dir_path - path to directory of netcdf4 wind speed files;
               default 'data/Wind_Speed_Data/'

Returns:
    u_dir_mean - a n_lat x n_lon numpy array representing the
                 average U-wind speed for each (n_lat, n_lon)
                 combination over the duration of the data
                 contained in the directory 'dir_path'
    v_dir_mean - a n_lat x n_lon numpy array representing the
                 average V-wind speed for each (n_lat, n_lon)
                 combination over the duration of the data
                 contained in the directory 'dir_path'
"""
def mean_wind_speed_directory(dir_path='data/Wind_Speed_Data/'):
    u_files, v_files = u_and_v_files(dir_path)
    u_means = [mean_wind_speed(u_file) for u_file in u_files]
    v_means = [mean_wind_speed(v_file) for v_file in v_files]
    u_dir_mean, v_dir_mean = np.mean(u_means, axis=0), np.mean(v_means, axis=0)
    return u_dir_mean, v_dir_mean

"""
Parameters:
    dir_path - path to directory of netcdf4 wind speed files;
               default 'data/Wind_Speed_Data/'

Returns:
    mean_wind_speed_dict - Python dictionary consisting of two entries:
                           'U-wind' and 'V-wind';
                            Each value is a pandas dataframe pivot table
                            where row indices are latitudes and column
                            indices are longitudes; the values represent
                            the mean wind speed for the corresponding
                            latitude and longitude indices and key
                            direction
"""
def mean_wind_speed_directory_table(mws_directory=None, dir_path='data/Wind_Speed_Data/'):
    mean_wind_speed_dict = dict()
    if mws_directory:
        u_dir_mean, v_dir_mean = mws_directory
    else:
        u_dir_mean, v_dir_mean = mean_wind_speed_directory(dir_path)
    n_lat, n_lon = lat_lon_dims_directory(dir_path)

    u_lat_lon_speed, v_lat_lon_speed = list(), list()
    lat, lon = lat_lon_vals_directory(dir_path)
    for i in range(n_lat):
        for j in range(n_lon):
            u_dict, v_dict = dict(), dict()
            u_dict['lat'], v_dict['lat'] = lat[i], lat[i]
            u_dict['lon'], v_dict['lon'] = lon[j], lon[j]
            u_dict['mean_speed'], v_dict['mean_speed'] = u_dir_mean[i][j], v_dir_mean[i][j]
            u_lat_lon_speed.append(u_dict)
            v_lat_lon_speed.append(v_dict)

    u_df = pd.DataFrame(u_lat_lon_speed)
    v_df = pd.DataFrame(v_lat_lon_speed)
    u_table = pd.pivot_table(u_df, index='lat', columns='lon')
    v_table = pd.pivot_table(v_df, index='lat', columns='lon')
    mean_wind_speed_dict['U-wind'] = u_table
    mean_wind_speed_dict['V-wind'] = v_table

    return mean_wind_speed_dict

## 5. USGS Protected Areas Database

Data retrieval method: Shapefile download (data/PADUS2_0_Shapefiles/)

Base filenames for shapefile reader:
- U.S. Boundaries - 'data/PADUS2_0_Shapefiles/CENSUS2016StateUSGSAlbers'
- Reclamation/Designation - 'data/PADUS2_0_Shapefiles/PADUS2_0Designation'
- Easement (federal land like forests) - 'data/PADUS2_0_Shapefiles/PADUS2_0Easement'
- Fee (federal land like nat. parks, trails, seashores, etc.) - 'data/PADUS2_0_Shapefiles/PADUS2_0Fee'
- Marine (marine protected areas) - 'data/PADUS2_0_Shapefiles/PADUS2_0Marine'
- Proclamation (DOD areas) - 'data/PADUS2_0_Shapefiles/PADUS2_0Proclamation'

In [12]:
base_filenames = {
    'boundaries':  'data/PADUS2_0_Shapefiles/CENSUS2016StateUSGSAlbers',
    'reclamation': 'data/PADUS2_0_Shapefiles/PADUS2_0Designation',
    'easement': 'data/PADUS2_0_Shapefiles/PADUS2_0Easement',
    'fee': 'data/PADUS2_0_Shapefiles/PADUS2_0Fee',
    'marine': 'data/PADUS2_0_Shapefiles/PADUS2_0Marine',
    'proclamation': 'data/PADUS2_0_Shapefiles/PADUS2_0Proclamation'
}

Exploring some of the data below.

In [25]:
f = base_filenames['boundaries']
sf = shapefile.Reader(f)
shapes = sf.shapes()
len(sf.shapes())
print(sf)

shapefile Reader
    56 shapes (type 'POLYGON')
    56 records (12 fields)


In [26]:
shapes[2].parts

[0]

In [27]:
sf.shapeRecords()[2].shape.points

[(-1708149.041099999, 1273314.1705999998),
 (-1708163.1033999994, 1273331.6206999999),
 (-1708258.6941, 1273449.0017),
 (-1708432.6928000003, 1273639.4233999997),
 (-1708725.6258000005, 1273808.2806000002),
 (-1709071.1588000003, 1273964.2149),
 (-1709378.1477000006, 1274100.1522000004),
 (-1710027.6262999997, 1274345.5176999997),
 (-1710445.7851, 1274466.6036999999),
 (-1711063.8606000002, 1274595.9488000004),
 (-1711357.1028000005, 1274671.1105000004),
 (-1711738.4651999995, 1274851.8837000001),
 (-1712026.355799999, 1274952.6339999996),
 (-1712390.5582999997, 1275001.0279),
 (-1712767.4847999997, 1275198.3816999998),
 (-1712981.0765000004, 1275293.9051),
 (-1713302.3100000005, 1275348.0559),
 (-1713529.9891, 1275291.6491),
 (-1713545.3000000007, 1275288.8783999998),
 (-1713612.2369, 1275276.7611999996),
 (-1713694.5458000004, 1275227.6397000002),
 (-1713728.4591000006, 1275207.3909),
 (-1713957.4319000002, 1275023.1388999997),
 (-1714157.3651, 1274790.1325000003),
 (-1714357.0702999

In [28]:
sf.shapes()[0].bbox


[704900.9243000001, 816309.6355999997, 1044913.3424000001, 1376504.3963999997]

In [29]:
sf.records()[0].as_dict()


{'STATENS': '01779775',
 'GEOID': '01',
 'STUSPS': 'AL',
 'NAME': 'Alabama',
 'ALAND': 131173688951.0,
 'AWATER': 4593686489.0,
 'INTPTLAT': '+32.7396323',
 'INTPTLON': '-86.8434593',
 'OBJECTID': 1,
 'SHAPE_Leng': 1789844.92299,
 'SHAPE_Area': 135767382993.0}