# NASA NTL 1: Data Collecting & Cleaning

In [None]:
import os
import ee
import folium
import datetime
import pandas as pd

# NYC Population Density Variable 2015 - 2020 (Not used)
##### No monthly data on this, so may not end up using in final regressions if I cant aggregate it at the same level as the other variables

In [None]:
# Data from U.S. Census QuickFacts (all data sources, help resources used, & people worked with are linked at bottom of notebook)
# No files were available to import, just assigned each year a value that the census listed as the population for that year
popdensity = []

pop15 = 8550405
pop16 = 8537673
pop17 = 8622698
pop18 = 8398750
pop19 = 8336817
pop20 = 8804190

pops = [8550405,8537673,8622698,8398750,8336817,8804190]

# Calculating pop density by year
for i in pops:
    density = i/300.45
    popdensity.append(density)
popdensity

# Turning into df
# Corresponding years starting from 2015
years = [2015, 2016, 2017, 2018, 2019, 2020]

# Creating the df with column names
population_density = pd.DataFrame({'Year': years, 'Population Density': popdensity})

print(population_density)

# Monthly Metro Card Counts 2015 - 2020

In [None]:
# Data from Data.ny.gov MTA NYCT MetroCard History: 2010 - 2021 Dataset

metroride = pd.read_csv("/Users/isabellagermani/Desktop/Projects/NASA Nighttime Lights & MTA Infrastructure/NYC MetroCard History 2010-2021.csv")

# Dropping unnecessary columns
columns_to_drop = ["From Date", "Remote Station ID", "Station", "Student", "NICE 2-Trip", "CUNY 120 Day", "CUNY 60 Day", "Fair Fares Pay-Per-Ride", "Fair Fares 7 Day Unlimited", "Fair Fares 30 Day Unlimited"]
metroride.drop(columns=columns_to_drop, inplace=True)
metroride

In [None]:
# Adding counts of all Metro Card Types together to see overall effect
metroridetotals = []

# For loop iterating through metroride rows 
for index, row in metroride.iterrows():
    metroridet = row['Full Fare'] + row['Senior Citizen/Disabled'] + row['7 Day ADA Farecard Access System Unlimited'] + row['30 Day ADA Farecard Access/Reduced Fare Media Unlimited'] + row['Joint Rail Road Ticket'] + row['7 Day Unlimited'] + row['30 Day Unlimited'] + row['14 Day Reduced Fare Media Unlimited'] + row['1 Day Unlimited'] + row['14 Day Unlimited'] + row['7 Day Express Bus Pass'] + row['TransitCheck'] + row['Long Island Bus Special Senior'] + row['Reduced Fare 2-Trip'] + row['Rail Road Unlimited No Trade'] + row['TransitCheck Annual'] + row['Mail and Ride EasyPay Express'] + row['Mail and Ride EasyPay Unlimited'] + row['PATH 2-Trip'] + row['AirTrain Full Fare'] + row['AirTrain 30 Day Unlimited'] + row['AirTrain 10-Trip'] + row['AirTrain Monthly']
    metroridetotals.append(metroridet)

# Creating a new df with this info
new_metro = pd.DataFrame({'Date': metroride['To Date'], 'Total Counts': metroridetotals})

# Converting Date column to datetime format
new_metro['Date'] = pd.to_datetime(new_metro['Date'])

# Sorting by Date column in ascending order
new_metro = new_metro.sort_values(by='Date')

# Filtering rows within the year range 2015 - 2020
new_metro = new_metro[(new_metro['Date'].dt.year >= 2015) & (new_metro['Date'].dt.year <= 2020)]

# Resetting index after dropping rows
new_metro = new_metro.reset_index(drop=True)
new_metro

In [None]:
# Checkpoint: Making sure I cant fuck with the original df
NEW_METRO = new_metro.copy()

# Defining new month and year columns for next cell's code
NEW_METRO['Month'] = NEW_METRO['Date'].dt.strftime('%m')
NEW_METRO['Year'] = NEW_METRO['Date'].dt.strftime('%Y')

# Concatenating Year & Month columns to make YearMonth column (to keep for end dataset: same format as NASA data, easier to aggregate)
NEW_METRO['YearMonth'] = NEW_METRO['Year'] + '-' + NEW_METRO['Month']
NEW_METRO

In [None]:
# Grouping by 'YearMonth' and calculating the average of 'Total Counts' for each month/year combo
MTA = NEW_METRO.groupby('YearMonth')['Total Counts'].mean().reset_index()

# Renaming columns
MTA.rename(columns={'Total Counts': 'Avg Monthly Metro Card Count'}, inplace=True)

MTA

# MTA Major Incidents 2015 - 2020

In [None]:
# Data from data.ny.gov Metropolitan Transportation Authority Performance Indicators per Agency: 2008-2021 report

incidents1519 = pd.read_csv('/Users/isabellagermani/Desktop/Projects/NASA Nighttime Lights & MTA Infrastructure/MTA Subway Major Incidents 2015-2019.csv')
incidents20 = pd.read_csv('/Users/isabellagermani/Desktop/Projects/NASA Nighttime Lights & MTA Infrastructure/MTA Subway Major Incidents 2020.csv')

# Concatenating both datasets into one
incidents = pd.concat([incidents1519,incidents20], ignore_index=True)
incidents

# Dropping rows outside date range (defining date range)
start_year = '2015'
end_year = '2020'

# Filtering to keep rows within desired year range
filtered_incidents = incidents[incidents['month'].str[:4].between(start_year, end_year)]

filtered_incidents

In [None]:
# Sorting by month
filtered_incidents_sorted = filtered_incidents.sort_values(by='month')
filtered_incidents_sorted

# Dropping unnecessary columns
columns_to_drop = ["division","line","day_type"]
filtered_incidents_sorted.drop(columns=columns_to_drop, inplace=True)
filtered_incidents_sorted

In [None]:
# Checkpoint: Working with copy df now
ICS = filtered_incidents_sorted.copy()

# Assigning numerical ID to each category name
category_ids, category_labels = pd.factorize(ICS['category']) # Getting unique strings and assigning their corresponding numerical IDs

# Changing column name
ICS.rename(columns={'month': 'YearMonth'}, inplace=True)

# Adding categoryID column to df
ICS['categoryID'] = category_ids
ICS

In [None]:
# Grouping by YearMonth and categoryID, and calculating the count frequencies all in 1
ICS['count_frequency'] = ICS.groupby(['YearMonth', 'categoryID'])['count'].transform('sum')
ICS

In [None]:
# Converting YearMonth to datetime
ICS['YearMonth'] = pd.to_datetime(ICS['YearMonth'])

# Extracting year and month to separate columns
ICS['Year'] = ICS['YearMonth'].dt.year
ICS['Month'] = ICS['YearMonth'].dt.month

# Creating a pivot table with count frequencies for each category
pivot_table = ICS.pivot_table(index=['Year', 'Month'], columns='category', values='count_frequency', aggfunc='sum') #*

# Reseting index to make Year and Month regular columns
pivot_table.reset_index(inplace=True)

# Filling NaN values with 0
pivot_table.fillna(0, inplace=True)

pivot_table

In [None]:
INCIDENTS = pd.DataFrame(pivot_table)

# Regenerating YearMonth column (dk where it went & why its not here)
# Converting Month to string format
INCIDENTS['Month'] = INCIDENTS['Month'].astype(str)

# Combining Year and Month columns with leading zeros to create 'YearMonth' for aggregation
INCIDENTS['YearMonth'] = INCIDENTS['Year'].astype(str) + '-' + INCIDENTS['Month'].str.zfill(2)

# Dropping Unnecessary columns
INCIDENTS.drop(columns='Year', inplace=True)
INCIDENTS.drop(columns='Month', inplace=True)

# Adding one more column to represent total number of major incidents
INCIDENTS['Total_n_Incidents'] = INCIDENTS.drop(columns='YearMonth').sum(axis=1)
INCIDENTS

In [None]:
# Last minute column name changes
INCIDENTS.rename(columns={'Other': 'NIncidents: Other'}, inplace=True)
INCIDENTS.rename(columns={'Persons on Trackbed/Police/Medical': 'NIncidents: Police/Medical/Person on Trackbed'}, inplace=True)
INCIDENTS.rename(columns={'Signals': 'NIncidents: Signals'}, inplace=True)
INCIDENTS.rename(columns={'Stations and Structure': 'NIncidents: Stations and Structure'}, inplace=True)
INCIDENTS.rename(columns={'Subway Car': 'NIncidents: Subway Car'}, inplace=True)
INCIDENTS.rename(columns={'Track': 'NIncidents: Track'}, inplace=True)
INCIDENTS

#  NASA Nighttime Lights Data 2015 - 2020

In [None]:
# Data obtained from NASA's VNP46a2 product data (Gap_Filled_DNB_BRDF_Corrected_NTL band) via Google Earth Engine API

# Authentication
ee.Authenticate()

# Initializing cloud project
ee.Initialize(project='nasantl')

In [None]:
# Setup & ROI Visualization

# Defining NYC as region of interest using a geometry polygon and estimated coordinates
roi = ee.Geometry.Polygon([
  [-74.03, 40.91],  # Staten Island (upper left)
  [-73.90, 40.91],  # Upper Manhattan
  [-73.90, 40.69],  # Lower Manhattan
  [-74.03, 40.50],  # Brooklyn
  [-73.78, 40.50],  # Queens
  [-73.78, 40.91],  # Bronx (upper right)
  [-74.03, 40.91]   # Close the polygon
])

# Wasnt able to properly group data by borough, but would definitely include in a revision 
# # Define polygons for NYC boroughs
# si_roi = ee.Geometry.Polygon([
#     [-74.259, 40.648],  # South West
#     [-74.043, 40.648],  # South East
#     [-74.043, 40.517],  # North East
#     [-74.259, 40.517],  # North West
#     [-74.259, 40.648]   # Close the polygon
# ])

# m_roi = ee.Geometry.Polygon([
#     [-74.026, 40.879],  # South West
#     [-73.909, 40.879],  # South East
#     [-73.909, 40.699],  # North East
#     [-74.026, 40.699],  # North West
#     [-74.026, 40.879]   # Close the polygon
# ])

# bk_roi = ee.Geometry.Polygon([
#     [-74.043, 40.698],  # South West
#     [-73.833, 40.698],  # South East
#     [-73.833, 40.570],  # North East
#     [-74.043, 40.570],  # North West
#     [-74.043, 40.698]   # Close the polygon
# ])

# q_roi = ee.Geometry.Polygon([
#     [-73.962, 40.700],  # South West
#     [-73.700, 40.700],  # South East
#     [-73.700, 40.541],  # North East
#     [-73.962, 40.541],  # North West
#     [-73.962, 40.700]   # Close the polygon
# ])

# bx_roi = ee.Geometry.Polygon([
#     [-73.933, 40.927],  # South West
#     [-73.700, 40.927],  # South East
#     [-73.700, 40.791],  # North East
#     [-73.933, 40.791],  # North West
#     [-73.933, 40.927]   # Close the polygon
# ])

# Defining variable containing filtered data
data = ee.ImageCollection('NOAA/VIIRS/001/VNP46A2').filterDate('2015-01-01', '2020-12-31')

# Selecting first image in dataset
brdf = data.first()

# Selecting correct Band for pixel values
brdf = brdf.select('Gap_Filled_DNB_BRDF_Corrected_NTL')
band_name = 'Gap_Filled_DNB_BRDF_Corrected_NTL'

# Defining Visualization parameters
brdfVis = {
  'min': 0,
  'max': 100,
  'palette': ['black', 'purple', 'cyan', 'green', 'yellow', 'red', 'white'],
  'region': roi
}

# Creating folium Map as object my_map using roi coordinates
my_map = folium.Map(location=[40.71, -73.93], zoom_start=10)

# Adding Google Earth Engine layer to map
folium.TileLayer(
    attr='Google Earth Engine',
    overlay=True,
    name='Gap_Filled_DNB_BRDF_Corrected_NTL'
).add_to(my_map)

# Displaying map
my_map

In [None]:
# Finding number of images in the dataset
num_images = data.size().getInfo() # have to include .getInfo() to call actual data for image
print("Number of images:", num_images)

# Defining first image as the first object in the dataset
first_image = ee.Image(data.first())

# Print information about the first image
print("Information about the first image:")
print(first_image.getInfo())

In [None]:
# Splitting data into years
data2015 = ee.ImageCollection('NOAA/VIIRS/001/VNP46A2').filterDate('2015-01-01', '2015-12-31')
data2016 = ee.ImageCollection('NOAA/VIIRS/001/VNP46A2').filterDate('2016-01-01', '2016-12-31')
data2017 = ee.ImageCollection('NOAA/VIIRS/001/VNP46A2').filterDate('2017-01-01', '2017-12-31')
data2018 = ee.ImageCollection('NOAA/VIIRS/001/VNP46A2').filterDate('2018-01-01', '2018-12-31')
data2019 = ee.ImageCollection('NOAA/VIIRS/001/VNP46A2').filterDate('2019-01-01', '2019-12-31')
data2020 = ee.ImageCollection('NOAA/VIIRS/001/VNP46A2').filterDate('2020-01-01', '2020-12-31')

# Defining eachs num_images
num_images_15 = data2018.size().getInfo()
num_images_16 = data2018.size().getInfo()
num_images_17 = data2018.size().getInfo()
num_images_18 = data2018.size().getInfo()
num_images_19 = data2019.size().getInfo()
num_images_20 = data2020.size().getInfo()

In [None]:
# Initializing lists
dates_2015 = [] # will contain the date (YYYY-MM-DD) image containing the pixel radiance value was taken
pixels_2015 = [] # will contain the light radiance pixel value measuring the intensity of artificial light source on that date 

dates_2016 = []
pixels_2016 = []

dates_2017 = []
pixels_2017 = []

dates_2018 = []
pixels_2018 = []

dates_2019 = []
pixels_2019 = []

dates_2020 = []
pixels_2020 = []

In [None]:
# FOR THE YEAR 2015
# Iterating over each image in the collection
for i in range(num_images_15):

    # Getting the current image info
    image_15 = ee.Image(data2015.toList(num_images_15).get(i))
    
    # Getting system start time variable (formatted in milliseconds since unix epoch (January 1, 1970))
    start_time_ms = image_15.get('system:time_start').getInfo()
    
    # Converting milliseconds to datetime format
    start_time = datetime.datetime.utcfromtimestamp(start_time_ms / 1000).strftime('%Y-%m-%d %H:%M:%S')
    # Adding each date to list
    dates_2015.append(start_time[:10])  # only gets first 9 characters (system:time_start var contains more info than YYYY-MM-DD)

    # Specifying which band to collect pixel intensity from
    band_name = 'Gap_Filled_DNB_BRDF_Corrected_NTL'
    # Getting pixel intensity values
    intensity = image_15.select(band_name).reduceRegion(reducer=ee.Reducer.mean(), geometry=roi, scale=30).get(band_name).getInfo()   
    # Adding each pixel value to list
    pixels_2015.append(intensity)

# Printing Lists
print(pixels_2015)
print(dates_2015)

In [None]:
# FOR THE YEAR 2016
# Iterating over each image in the collection
for i in range(num_images_16):

    # Getting the current image info
    image_16 = ee.Image(data2016.toList(num_images_16).get(i))
    
    # Getting system start time variable (formatted in milliseconds since unix epoch (January 1, 1970))
    start_time_ms = image_16.get('system:time_start').getInfo()
    
    # Converting milliseconds to datetime format
    start_time = datetime.datetime.utcfromtimestamp(start_time_ms / 1000).strftime('%Y-%m-%d %H:%M:%S')
    # Adding each date to list
    dates_2016.append(start_time[:10]) 

    # Specifying which band to collect pixel intensity from
    band_name = 'Gap_Filled_DNB_BRDF_Corrected_NTL'
    # Getting pixel intensity values
    intensity = image_16.select(band_name).reduceRegion(reducer=ee.Reducer.mean(), geometry=roi, scale=30).get(band_name).getInfo()   
    # Adding each pixel value to list
    pixels_2016.append(intensity)

# Printing Lists
print(pixels_2016)
print(dates_2016)

In [None]:
# FOR THE YEAR 2017
# Iterating over each image in the collection
for i in range(num_images_17):

    # Getting the current image info
    image_17 = ee.Image(data2017.toList(num_images_17).get(i))
    
    # Getting system start time variable (formatted in milliseconds since unix epoch (January 1, 1970))
    start_time_ms = image_17.get('system:time_start').getInfo()
    
    # Converting milliseconds to datetime format
    start_time = datetime.datetime.utcfromtimestamp(start_time_ms / 1000).strftime('%Y-%m-%d %H:%M:%S')
    # Adding each date to list
    dates_2017.append(start_time[:10]) 

    # Specifying which band to collect pixel intensity from
    band_name = 'Gap_Filled_DNB_BRDF_Corrected_NTL'
    # Getting pixel intensity values
    intensity = image_17.select(band_name).reduceRegion(reducer=ee.Reducer.mean(), geometry=roi, scale=30).get(band_name).getInfo()   
    # Adding each pixel value to list
    pixels_2017.append(intensity)

# Printing Lists
print(pixels_2017)
print(dates_2017)

In [None]:
# FOR THE YEAR 2018
# Iterating over each image in the collection
for i in range(num_images_18):

    # Getting the current image info
    image_18 = ee.Image(data2018.toList(num_images_18).get(i))
    
    # Getting system start time variable (formatted in milliseconds since unix epoch (January 1, 1970))
    start_time_ms = image_18.get('system:time_start').getInfo()
    
    # Converting milliseconds to datetime format
    start_time = datetime.datetime.utcfromtimestamp(start_time_ms / 1000).strftime('%Y-%m-%d %H:%M:%S')
    # Adding each date to list
    dates_2018.append(start_time[:10]) 

    # Specifying which band to collect pixel intensity from
    band_name = 'Gap_Filled_DNB_BRDF_Corrected_NTL'
    # Getting pixel intensity values
    intensity = image_18.select(band_name).reduceRegion(reducer=ee.Reducer.mean(), geometry=roi, scale=30).get(band_name).getInfo()   
    # Adding each pixel value to list
    pixels_2018.append(intensity)

# Printing Lists
print(pixels_2018)
print(dates_2018) 

In [None]:
# FOR THE YEAR 2019
# Iterating over each image in the collection
for i in range(num_images_19):

    # Getting the current image info
    image_19 = ee.Image(data2019.toList(num_images_19).get(i))
    
    # Getting system start time variable (formatted in milliseconds since unix epoch (January 1, 1970))
    start_time_ms = image_19.get('system:time_start').getInfo()
    
    # Converting milliseconds to datetime format
    start_time = datetime.datetime.utcfromtimestamp(start_time_ms / 1000).strftime('%Y-%m-%d %H:%M:%S')
    # Adding each date to list
    dates_2019.append(start_time[:10])
    # Specifying which band to collect pixel intensity from
    band_name = 'Gap_Filled_DNB_BRDF_Corrected_NTL'
    # Getting pixel intensity values
    intensity = image_19.select(band_name).reduceRegion(reducer=ee.Reducer.mean(), geometry=roi, scale=30).get(band_name).getInfo()   
    # Adding each pixel value to list
    pixels_2019.append(intensity)

# Printing Lists
print(pixels_2019)
print(dates_2019)

In [None]:
# FOR THE YEAR 2020

# Iterating over each image in the collection
for i in range(num_images_20):

    # Getting the current image info
    image_20 = ee.Image(data2020.toList(num_images_20).get(i))
    
    # Getting system start time variable
    start_time_ms = image_20.get('system:time_start').getInfo()
    
    # Converting milliseconds to datetime format
    start_time = datetime.datetime.utcfromtimestamp(start_time_ms / 1000).strftime('%Y-%m-%d %H:%M:%S')
    # Adding each date to list
    dates_2020.append(start_time[:10])

    # Specifying which band to collect pixel intensity from
    band_name = 'Gap_Filled_DNB_BRDF_Corrected_NTL'
    # Getting pixel intensity values
    intensity = image_20.select(band_name).reduceRegion(reducer=ee.Reducer.mean(), geometry=roi, scale=30).get(band_name).getInfo()   
    # Adding each pixel value to list
    pixels_2020.append(intensity)

# Printing Lists
print(pixels_2020)
print(dates_2020)

In [None]:
# Pasting lists together to create pandas df

# Concatenating all of the lists
All_Pixels = pixels_2015 + pixels_2016 + pixels_2017 + pixels_2018 + pixels_2019 + pixels_2020 
All_Dates = dates_2015 + dates_2016 + dates_2017 + dates_2018 + dates_2019 + dates_2020 

# # Dropping 2 values from dates (causes formatting issue otherwise)
All_Dates = All_Dates[:-2]

# Creating df that has 2 columns "Date" & "Light_intensity" and uses the list info for column values
NASA = pd.DataFrame({'Date': All_Dates, 'Light_intensity': All_Pixels})
NASA

In [None]:
# Checkpoint: Making sure I cant use the original df
NTL1 = NASA.copy()

# Making avg light intensity per month variable & adjusting date column to only show year & month
# Getting year and month from Date column
NTL1['YearMonth'] = NTL1['Date'].str[:7]

# Converting light intensity to numeric value
NTL1['Light_intensity'] = pd.to_numeric(NTL1['Light_intensity'], errors='coerce') #*

# Grouping by YearMonth and calculating avg light intensity
NTL = NTL1.groupby('YearMonth')['Light_intensity'].mean().reset_index()
NTL

# FINAL DATASET 

In [None]:
# Checkpoint: Making copies again for backup
NASA_NTL = NTL.copy()
Metro_Cards = MTA.copy()
Major_Incidents = INCIDENTS.copy()

In [None]:
# Merging on YearMonth column
MERGED = NASA_NTL.merge(Metro_Cards, on='YearMonth', how='outer').merge(Major_Incidents, on='YearMonth', how='outer')
MERGED

# Replacing NaN values w/ 0s
MERGED.fillna(0, inplace=True)

# Removing trailing zeros from float columns
MERGED = MERGED.apply(lambda x: x.astype(int) if x.name != 'YearMonth' else x)

# Renaming again
MERGED.rename(columns={'YearMonth': 'Month'}, inplace=True)
MERGED.rename(columns={'NIncidents: Other': 'Number of incidents: Other'}, inplace=True)
MERGED.rename(columns={'NIncidents: Police/Medical/Person on Trackbed': 'Number of incidents: Police/Medical/Person on Track'}, inplace=True)
MERGED.rename(columns={'NIncidents: Signals': 'Number of incidents: Signals'}, inplace=True)
MERGED.rename(columns={'NIncidents: Stations and Structure': 'Number of incidents: Stations and Structure'}, inplace=True)
MERGED.rename(columns={'NIncidents: Subway Car': 'Number of incidents: Subway Car'}, inplace=True)
MERGED.rename(columns={'NIncidents: Track': 'Number of incidents: Track'}, inplace=True)
MERGED.rename(columns={'Total_n_Incidents': 'Total Number of incidents'}, inplace=True)
MERGED.rename(columns={'Light_intensity': 'Avg Light Intensity Value'}, inplace=True)
MERGED

In [None]:
# EXPORTING

# Defining location path to save CSV file to
desired_dir = '/Users/isabellagermani/Desktop/Projects/NASA Nighttime Lights & MTA Infrastructure'

# Checking current working directory
current_dir = os.getcwd()
print("Current working directory:", current_dir)

# Changing the working directory if it's different from the desired directory
if current_dir != desired_dir:
    os.chdir(desired_dir)
    print("Changed working directory to:", desired_dir)

# Exporting dataframe to CSV 
MERGED.to_csv('merged_dataset.csv', index=False)
MERGED

One regression I plan to run is metro card count on light intensity. I hypothesize that as the light intensity increases, number of MTA riders will also rise

# Sources Used

### General Help
grouping by both month and year: https://stackoverflow.com/questions/26646191/pandas-groupby-month-and-year 

.loc and .iloc: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html 

Resetting indexes: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reset_index.html

Iterating over pandas df rows: https://pythonexamples.org/pandas-dataframe-iterate-rows-iterrows/ 

folium documentation: https://python-visualization.github.io/folium/latest/user_guide.html

bbox coordinates: https://gis.stackexchange.com/questions/255158/get-minimum-and-maximum-latitude-and-longitude-of-new-york

geodataframe resource: https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoDataFrame.html

homebrew: https://www.digitalocean.com/community/tutorials/how-to-install-and-use-homebrew-on-macos

geometry rectangle: https://developers.google.com/earth-engine/apidocs/ee-geometry-rectangle 

### # Metro Card Counts Variable
Data source: https://data.ny.gov/Transportation/MTA-NYCT-MetroCard-History-2010-2021/v7qc-gwpn/about_data 

### MTA Major Subway Incidents Variable
15-19 Data Source: https://data.ny.gov/Transportation/MTA-Subway-Major-Incidents-2015-2019/ereg-mcvp/about_data
2020 Data Source: https://data.ny.gov/Transportation/MTA-Subway-Major-Incidents-Beginning-2020/j6d2-s8m2/about_data

### Social Vulnerability Data
Components of Social Vulnerability: https://experience.arcgis.com/experience/b0341fa9b237456c9a9f1758c15cde8d/ 

Variables in Census dataset: https://api.census.gov/data/2022/cre/variables.html

Example API Census calls: https://api.census.gov/data/2022/cre/examples.html 

### NASA NTL Data / Google Earth Engine (method of NTL data download)
NASA BlackMarble user guide: https://viirsland.gsfc.nasa.gov/PDF/BlackMarbleUserGuide_v1.2_20220916.pdf

NASA useful tools: https://blackmarble.gsfc.nasa.gov/Tools.html

NTL dataset Introduction: https://appliedsciences.nasa.gov/sites/default/files/2020-12/BlackMarble_2020.pdf

Python script download resource: https://ladsweb.modaps.eosdis.nasa.gov/tools-and-services/data-download-scripts/#wget

EarthData Forum: https://forum.earthdata.nasa.gov/viewtopic.php?t=4532 

How to use Google Earth Engine API for NTL data: https://gis.stackexchange.com/questions/460447/download-nasas-black-marble-vnpa2-nighttime-light-data-using-google-earth-engin 

Earth Engine NTL documentation: https://developers.google.com/earth-engine/datasets/catalog/NOAA_VIIRS_001_VNP46A2#code-editor-javascript 

Image Visualization & Bands: https://developers.google.com/earth-engine/tutorials/tutorial_api_02 

Google Earth Engine Cloud Product Portal: https://console.cloud.google.com/apis/api/earthengine.googleapis.com/credentials?project=nasantl 

How to compute from image data: https://developers.google.com/earth-engine/tutorials/tutorial_api_03

Figuring out what product output looks like & how to use it for this research question: https://www.youtube.com/watch?v=KSxlhBLOAc4

### Population Density Data
US Census QuickFacts: https://www.census.gov/quickfacts/fact/table/newyorkcitynewyork/PST045223#PST045223

U.S. Census Population Estimates: https://www.nyc.gov/assets/planning/download/pdf/planning-level/nyc-population/population-estimates/current-population-estimates-2023.pdf

NYS department of health population estimates: https://www.health.ny.gov/statistics/vital_statistics/2017/table02.html

Total Land Area Value: https://www.census.gov/quickfacts/fact/table/newyorkcitynewyork/LND110220

### people worked with: Teja Vuppu, multiple learning commons students