#### Importing needed libraries and dependencies

In [1]:
import os 
import ee
import time
import rasterio
import networkx as nx
import numpy as np
import osmnx as ox
import pandas as pd
import geopandas as gpd 
import shapely.geometry
from rasterstats import zonal_stats
from pylandtemp import single_window
from shapely.geometry import Point, Polygon

ModuleNotFoundError: No module named 'ee'

#### Setting initial varibles

Adjust the variables below in order to fine tune the calculations done throughout the code

In [2]:
#Set the directory for download and directory of counter file
os.chdir('/Users/winke/Documents/University/Thesis/Extracts')

#Set the crs projection to work in
crs = 3857

#Read the counter data into the program
df = pd.read_csv("weekly_counts.csv")

#Select cities and create a list that can be iterated through
citynames_l = df['city'].drop_duplicates().tolist()

#Checking working directory and creating path
directory = os.getcwd() #use working directory or desired location
connect = "/"

In [3]:
print(citynames_l)

['Amsterdam', 'Rotterdam', 'New York', 'Seattle', 'Utrecht', 'Norfolk, Virginia', 'Arlington County', 'Austin', 'Minneapolis', 'Almere', 'Arnhem', 'London', 'Northampton', 'Leeds', 'Birmingham', 'Nijmegen', 'Apeldoorn', 'York', 'Duluth', 'Maastricht', 'Boulder', 'Portland', 'Cambridge', 'Denver', 'Philadelphia', 'Houston', 'Dallas', 'Glasgow City', 'Edinburgh', 'Aberdeen', 'Wageningen', 'Gouda', 'San Diego', 'Venlo', 'Amersfoort', 'Tampa', 'Gainesville', 'Tallahassee', 'Fort Lauderdale', 'Miami', 'The Hague', 'Leiden', 'Zeist', 'Dordrecht', 'Ede', 'Raleigh', 'Cary', 'Durham', 'Greensboro', 'Charlotte']


In [78]:
#For exporting particular cities, list them here
citynames_l = []

#### Creating city folders and extracting city bounds

In order to be able to extract geographic variables for each of the cities included in the analysis, the bounds of the city will have to be determined. The bounds are extracted from the OSMnx API and converted into a rectangular shape, ecompassing the total extent of the city, as defined by OSMnx. Additionally, a small buffer is added to ensure that counter locations at the edge of the city will be able to be included in the analysis.

In [None]:
### Extracting city bound ###

for city in citynames_l:
    folder = city
    path = directory + connect + folder

    #Check if folder exists and if not, create it
    if not os.path.exists(path):
        os.makedirs(path)

    # download/model a street network for some city
    G = ox.graph_from_place(city, network_type="all")

    #Save to geopackage
    ox.save_graph_geopackage(G, folder + "/network.gpkg")

    #Save to graphml
    ox.io.save_graphml(G, folder + "/network.graphml")
    
    # Retrieve only edges from the graph
    nodes_proj, edges = ox.graph_to_gdfs(G, nodes=True, edges=True)
    
    #Get the bounding box of all the edges, this will be the are of interest for each city
    bbox_env = edges.unary_union.envelope

    #Create buffer around city to make sure all count locations are captured
    bbox_env_buffer = bbox_env.buffer(0.08, cap_style=3, join_style=2)

    #Save bounding box as gpkg
    envgdf = gpd.GeoDataFrame(geometry=gpd.GeoSeries(bbox_env_buffer))
    envgdf.to_file(city + connect + "citybound_gpkg.gpkg", driver="GPKG")
    
    print(city + " is finished")

    # Leave time in between extractions to save server connections
    time.sleep(20)

#### Download OSMNx point data

Part of the variables that are being generated are statistics of POI points sorrounding each counter location. To calculate these statistics, the locations of these POI points are downloaded and saved in a city specific folder. The POI location points are extracted from the OSMnx API and saved for each indiviual city in the corresponding folder. 

For each city, the city bound files are used in order to determine the extent to which these POI points are meant to be downloaded. Each categorical collection of POI points is downloaded and saved as a GeoPackage file. 

Dependencies:
1. City folders created
2. City bounds downloaded


In [None]:
### Downloading point data ###
citynames_l = ['Glasgow City']
for c in citynames_l:

    #Get the correct directory to work in
    city_path = directory + '/' + c

    #Read the city bound that has been downloaded
    citybound = gpd.read_file(city_path + '/citybound_gpkg.gpkg')

    #Create shapely polygon from citybounds
    for index, p in citybound.iterrows():
        poly = p.geometry
    
    print(poly)

    #Extracting bus stops
    bus_stops = ox.geometries_from_polygon(poly, tags={'highway': 'bus_stop'})
    bus_stops = bus_stops.to_crs(crs)
    bus_stops = bus_stops[['geometry']]
    bus_stops.to_file(city_path + "/bus_stops.gpkg", driver="GPKG")

    time.sleep(10)

    #Extracting restaurants
    restaurants = ox.geometries_from_polygon(poly, tags={'amenity': ['bar', 'pub', 'restaurant', 'cafe']})
    restaurants = restaurants.to_crs(crs)
    restaurants = restaurants[['geometry']]
    restaurants.to_file(city_path + "/restaurants.gpkg", driver="GPKG")

    time.sleep(10)

    #Extracting bike POIs
    bikePOI = ox.geometries_from_polygon(poly, tags={'amenity': ['bicycle_parking', 'bicycle_repair_station', 'bicycle_rental']})
    bikePOI.to_crs(crs)
    print(bikePOI)
    bikePOI = bikePOI[['geometry']]
    bikePOI.to_file(city_path + "/bikePOI.gpkg", driver="GPKG")

    time.sleep(10)

    #Extracting shops
    shops = ox.geometries_from_polygon(poly, tags={'shop': ['department_store', 'supermarket', 'convenience']})
    shops.to_crs(crs)
    shops = shops[['geometry']]
    shops.to_file(city_path + "/shops.gpkg", driver="GPKG")

    time.sleep(10)

    #Extracting greenspace
    greenspace = ox.geometries_from_polygon(poly, tags={'leisure': ['garden', 'nature_reserve', 'park', 'pitch']})
    greenspace.to_crs(crs)
    greenspace_cut = greenspace[['geometry']]
    greenspace_cut.to_file(city_path + "/greenspace.gpkg", driver="GPKG")

    time.sleep(10)

    #Extracting cyclelanes
    cycleways = ox.geometries_from_polygon(poly, tags={'cycleway': True, 'highway':'cycleway'})
    cycleways.to_crs(crs)
    cycleways_c = cycleways[['geometry']]

    #Create union out of all geometry extracted
    temp_list = []

    for index, x in cycleways_c.iterrows():
        temp_list.append(x.geometry)

    series = gpd.GeoSeries(temp_list)

    #Convert to gdf and export as gpkg
    cycleways_gdf = gpd.GeoDataFrame(geometry=gpd.GeoSeries(series))
    cycleways_gdf.to_file(city_path + "/cycleways.gpkg", driver="GPKG")

    time.sleep(10)
    
    print(c + " done")


#### Extracting Google Earth Engine raster data

The Google Earth Engine API is used in order to download global raster files for calculating additional geographical variables. The raster images are clipped down to the extent of each city using the city bounds. Once extracted, the images will have to be downloaded from Google Drive and inserted into each city folder in order to be able to be used for later calculation. 

IMPORTANT: Google Earth Engine is not able to download raster files to the device locally, it will therefore connect to the users Google Drive account and save the raster files there, which will have to then be manually downloaded and added into the folder corresponding to each of the cities.

Dependencies:
1. City bounds extracted

In [42]:
#Authenticate Earth Engine through Google Account (this accounts Google Drive will be used to save the data)
ee.Authenticate()
ee.Initialize()


Successfully saved authorization token.


In [43]:
### Raster file extraction from GEE ###

for d in citynames_l:
    
    #Get the correct directory to work in
    city_path = directory + '/' + d

    #Read the city bound that has been downloaded
    citybound = gpd.read_file(city_path + '/citybound_gpkg.gpkg')

    #Create shapely polygon from citybounds
    for index, p in citybound.iterrows():
        poly = p.geometry

    #Get citybound coordinates for extraction
    xx, yy = poly.exterior.coords.xy
    x = xx.tolist()
    y = yy.tolist()
    poly_coords = []

    num = 0

    for coord in x:
        temp = []
        temp.append(x[num])
        temp.append(y[num])
        poly_coords.append(temp)
        num += 1

    #Define region as ee.Geometry
    region = ee.Geometry.Polygon(poly_coords)

    ##NDVI##
    #Downloading Sentinal 2 data
    sen_2 = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterDate('2021-01-01', '2021-12-31').filterBounds(region).filterMetadata('CLOUDY_PIXEL_PERCENTAGE','less_than', 5)

    sen_2 = sen_2.mean()
    
    #Calculating NDVI from image collection
    def calculate_ndvi(image):
        ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
        return image.addBands(ndvi)

    ndvi = calculate_ndvi(sen_2).select('NDVI')

    #Export to GoogleDrive
    task = ee.batch.Export.image.toDrive(**{
        'image': ndvi,
        'description': 'NVDI',
        'folder': d,
        'scale': 10,
        'region': region.getInfo()['coordinates']
        })

    task.start()
    print('NDVI done for ' + d)

    time.sleep(10)

    ##DEM##
    #Downloading Sentinal 2 data
    srtm = ee.Image('USGS/SRTMGL1_003')
    srtm_s = srtm.select('elevation')

    #Export to GoogleDrive
    task = ee.batch.Export.image.toDrive(**{
        'image': srtm_s,
        'description': 'DEM',
        'folder': d,
        'scale': 30,
        'region': region.getInfo()['coordinates']
        })

    task.start()

    time.sleep(10)

    ##LST##
    #For the LST calculation, 3 raster files are needed. They are downloaded seperately and then used together in calculation.
    #Might have to adjust cloud cover percentage for certain cities
    #Downloading data
    landsat_8 = ee.ImageCollection('LANDSAT/LC08/C02/T1').filterDate('2020-01-01', '2022-12-31').filterBounds(region).filterMetadata('CLOUD_COVER','less_than', 30)

    #Select LST band from recording
    red = landsat_8.select('B4')
    nir = landsat_8.select('B5')
    thermal = landsat_8.select('B10')

    red_mean = red.mean()
    nir_mean = nir.mean()
    thermal_mean = thermal.mean()

    #Export red to GoogleDrive
    task = ee.batch.Export.image.toDrive(**{
        'image': red_mean,
        'description': 'LST_red',
        'folder': d,
        'scale': 30,
        'region': region.getInfo()['coordinates']
        })

    task.start()

    #Export nir to GoogleDrive
    task = ee.batch.Export.image.toDrive(**{
        'image': nir_mean,
        'description': 'LST_nir',
        'folder': d,
        'scale': 30,
        'region': region.getInfo()['coordinates']
        })

    task.start()

    #Export red to GoogleDrive
    task = ee.batch.Export.image.toDrive(**{
        'image': thermal_mean,
        'description': 'LST_thermal',
        'folder': d,
        'scale': 30,
        'region': region.getInfo()['coordinates']
        })

    task.start()

    # time.sleep(10)

    ##Land Cover##
    #Downloading Sentinal 2 data
    lc_2 = ee.ImageCollection('ESA/WorldCover/v100').filterDate('2020-01-01', '2020-12-31').filterBounds(region).first()

    #Select LST band from recording
    lc = lc_2.select('Map')

    #Export to GoogleDrive
    task = ee.batch.Export.image.toDrive(**{
        'image': lc,
        'description': 'landcover',
        'folder': d,
        'scale': 30,
        'region': region.getInfo()['coordinates']
        })

    task.start()

    time.sleep(10)

    ##Pop grid##
    #Downloading WorldPop Global data
    worldpop = ee.ImageCollection('WorldPop/GP/100m/pop').filterDate('2020-01-01', '2020-12-31').filterBounds(region)
    
    worldpop = worldpop.mean()
    worldpop_2 = worldpop.select('population')

    #Export to GoogleDrive
    task = ee.batch.Export.image.toDrive(**{
        'image': worldpop_2,
        'description': 'population',
        'folder': d,
        'scale': 100,
        'region': region.getInfo()['coordinates']
        })

    task.start()
    print('GEE done for ' + d)

NDVI done for Glasgow City
GEE done for Glasgow City


#### Extracting network statistics from OSMnx

Part of the variables that are used in counter prediction are network statistics that are based on the OSMnx network sorrounding each of the counter locations. The below code extracts the networks around each of the counter locations and calculates the basic network statistics provided by OSMnx.


In [5]:
### Extracting network statistics ###

df_keys = []
df_values = []
df_index = []
locations = []
key_saved = False

#Select which counter points are meant to be extracted
df_select = df.iloc[1:3]

for index, row in df_select.iterrows():
    lat = row['latitude']
    lon = row['longitude']
    point = (lat, lon)

    #Download network graph for area
    G = ox.graph_from_point(point, dist=500, network_type="all", clean_periphery=True)
    # G_proj = ox.project_graph(G)

    #Calculate statistics around the particular point
    stats = ox.basic_stats(G)

    print(stats)
#     keys = list(stats.keys())
#     values = list(stats.values())
#     locations.append(row['name'])

#     if key_saved == False:
#         df_keys = keys
#         key_saved = True

#     df_values.append(values)
#     df_index.append(index)

#     time.sleep(20)
    
#     (print('location', index, 'done'))

# df1 = pd.DataFrame(df_values, columns=df_keys)
# df1['locations'] = locations

# df1

inter3 = []
inter4 = []

for x in intersect_count:
    if type(x) is int:
        in_3 = 0
        in_4 = 0
    else:    
        if 3 in x.keys():    
            in_3 = x[3]
        else:
            in_3 = 0
        if 4 in x.keys():
            in_4 = x[4]
        else:
            in_4 = 0
    inter3.append(in_3)
    inter4.append(in_4)

data = pd.DataFrame()
data['3_way_int_count'] = inter3
data['4_way_int_count'] = inter4



{'n': 631, 'm': 1589, 'k_avg': 5.0364500792393025, 'edge_length_total': 59534.31999999988, 'edge_length_avg': 37.46653241032088, 'streets_per_node_avg': 3.198098256735341, 'streets_per_node_counts': {0: 0, 1: 52, 2: 1, 3: 350, 4: 226, 5: 2}, 'streets_per_node_proportions': {0: 0.0, 1: 0.08240887480190175, 2: 0.001584786053882726, 3: 0.554675118858954, 4: 0.358161648177496, 5: 0.003169572107765452}, 'intersection_count': 579, 'street_length_total': 38075.68599999998, 'street_segment_count': 964, 'street_length_avg': 39.49759958506222, 'circuity_avg': 1.0486865710522633, 'self_loop_proportion': 0.002074688796680498}
{'n': 226, 'm': 533, 'k_avg': 4.716814159292035, 'edge_length_total': 18677.141000000003, 'edge_length_avg': 35.041540337711076, 'streets_per_node_avg': 2.7389380530973453, 'streets_per_node_counts': {0: 0, 1: 54, 2: 0, 3: 126, 4: 43, 5: 3}, 'streets_per_node_proportions': {0: 0.0, 1: 0.23893805309734514, 2: 0.0, 3: 0.5575221238938053, 4: 0.1902654867256637, 5: 0.013274336283

In [22]:
#Save results to a csv in directory
df1.to_csv("network_statistics_4.csv")

#### Create buffers

In order to calculate the statistics of the various spatial variables extracted above, buffers are created around each counter location. These buffers are then saved as a geopackage file in each city folder in order to be extracted for calculating the values for the different spatial variables. 

In [4]:
### Create buffers ###

for e in citynames_l:

    #Define path
    city_path = directory + connect + e

    df2 = df[df['city'] == e]
    df3 = df2[['name', 'latitude', 'longitude']].copy()

    #Convert to geopandas with point 
    geometry = [Point(xy) for xy in zip(df3.longitude, df3.latitude)]
    df4 = df3.drop(['longitude', 'latitude'], axis=1)
    gdf = gpd.GeoDataFrame(df4, crs="EPSG:4326", geometry=geometry)
    gdf = gdf.to_crs(crs)

    ##Create buffers##
    buffer = gdf.buffer(370) #buffer zones end up being 500m diameter
    buffer_gdf = gpd.GeoDataFrame(geometry=buffer).reset_index().to_crs(4326)

    #Save gdf of buffers to folder
    folder_name = "buffer_gpkg_1"
    folder_path = directory + connect + e + connect + folder_name

    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    buffer_gdf.to_file(folder_path + "/point_buffers.gpkg", driver="GPKG")


#### Calculating point variables

These functions use the POI points extracted from OSMnx API to calculate the amount that each POI occurs around each counter location. This calculation loads both the buffers and OSMnx point location to calculate the variables. 

Dependencies:
1. Buffers created
2. OSMnx point data downloaded

In [None]:
### Calculate point variables ###

#Check for overlap between each polygon and points
bike_point_list = []
bus_stops_list = []
restaurant_list = []
shops_list = []

for city in citynames_l:

    #Defining path for current city
    city_path = directory + connect + city

    #Connect folder and load in points
    bike_shops = gpd.read_file(city_path + "/bikePOI.gpkg").to_crs(crs)
    bus_stops = gpd.read_file(city_path + "/bus_stops.gpkg").to_crs(crs)
    restaurants = gpd.read_file(city_path + "/restaurants.gpkg").to_crs(crs)
    shops = gpd.read_file(city_path + "/shops.gpkg").to_crs(crs)

    #Load in buffers
    buffer_gdkg = gpd.read_file(city_path + "/buffer_gpkg/point_buffers.gpkg").to_crs(crs)

    for index, row in buffer_gdkg.iterrows():
        temp = row.geometry
        bike_point_counts = 0
        bus_stop_counts = 0
        restaurant_counts = 0
        shop_counts = 0

        #overlap bike shops
        for index, row in bike_shops.iterrows():
            temp1 = row.geometry
            
            if temp.contains(temp1):
                bike_point_counts += 1

        #overlap bus stops
        for index, row in bus_stops.iterrows():
            temp1 = row.geometry
            
            if temp.contains(temp1):
                bus_stop_counts += 1

        #overlap restaurants
        for index, row in restaurants.iterrows():
            temp1 = row.geometry
            
            if temp.contains(temp1):
                restaurant_counts += 1

        #overlap shops
        for index, row in shops.iterrows():
            temp1 = row.geometry
            
            if temp.contains(temp1):
                shop_counts += 1

        bike_point_list.append(bike_point_counts)
        bus_stops_list.append(bus_stop_counts)
        restaurant_list.append(restaurant_counts)
        shops_list.append(shop_counts)


df5 = pd.DataFrame()

df5['bike_points'] = bike_point_list
df5['bus_stops'] = bus_stops_list
df5['restaurants'] = restaurant_list
df5['shop_list'] = shops_list

df5

In [50]:
#Export dataframe to csv
df5.to_csv('point_data_export_2.csv')

#### Calculating distance to POI variables

Using the greenspace and education POI locations, this algorithm calculates the network distance between each counter location and their closest greenspace and education POI. 

In [None]:
### Calculating distance variabels to POIs ###

greenspace_dist_list = []
bike_dist_list = []

for city in citynames_l:

    city_path = directory + connect + city

    #Creating dataframe
    df2 = df[df['city'] == city]
    df3 = df2[['name', 'latitude', 'longitude']].copy()

    #Convert to geopandas with point 
    geometry = [Point(xy) for xy in zip(df3.longitude, df3.latitude)]
    df4 = df3.drop(['longitude', 'latitude'], axis=1)
    gdf = gpd.GeoDataFrame(df4, crs="EPSG:4326", geometry=geometry)
    gdf = gdf.to_crs(crs)

    #Load in street network graph
    G = ox.load_graphml(city_path + "/network.graphml")

    #Load in greenspace polygons
    greenspace = gpd.read_file(city_path + "/greenspace.gpkg").to_crs(crs)

    for index, point in gdf.iterrows():
        #Find closest polygon and get centroid
        polygon_index = greenspace.distance(point.geometry).sort_values().index[0]
        nearest_centroid = greenspace.loc[polygon_index].geometry.centroid

        #Extract coordinates of point
        x, y = point.geometry.coords.xy
        x = x[0]
        y = y[0]

        #Find closest node
        node_counter = ox.distance.nearest_nodes(G, x, y)
        
        #Extract coordinates of point
        xx, yy = nearest_centroid.xy
        xx = xx[0]
        yy = yy[0]

        #Find closest node
        node_greenspace = ox.distance.nearest_nodes(G, xx, yy)

        #Calculate shortest path
        try:
            nx.shortest_path_length(G, node_counter, node_greenspace)
        
        except:
            s_path = 0
        
        else:
            s_path = nx.shortest_path_length(G, node_counter, node_greenspace)
        
        greenspace_dist_list.append(s_path)

    #Load in street network graph
    G = ox.load_graphml(city_path + "/network.graphml")

    #Load in greenspace polygons
    bikePOI = gpd.read_file(city_path + "/bikePOI.gpkg").to_crs(crs)

    #Import points as gdf and loop through them
    for index, point in gdf.iterrows():
        polygon_index = bikePOI.distance(point.geometry).sort_values().index[0]
        nearest_bike = bikePOI.loc[polygon_index].geometry.centroid

        #Extract coordinates of point
        x, y = point.geometry.coords.xy
        x = x[0]
        y = y[0]

        #Find closest node
        node_counter = ox.distance.nearest_nodes(G, x, y)
        
        #Extract coordinates of point
        xx, yy = nearest_bike.xy
        xx = xx[0]
        yy = yy[0]
        
        #Find closest node
        node_greenspace = ox.distance.nearest_nodes(G, xx, yy)

        #Calculate shortest path
        try:
            nx.shortest_path_length(G, node_counter, node_greenspace)
        
        except:
            s_path = 0
        
        else:
            s_path = nx.shortest_path_length(G, node_counter, node_greenspace)

        bike_dist_list.append(s_path)

    print(city + " is done")

df6 = pd.DataFrame()
df6['dist_to_greenspace'] = greenspace_dist_list
df6['dist_to_bikePOI'] = bike_dist_list

df6

In [10]:
#Export dataframe to csv
df6.to_csv('dist_to_green_bike.csv')

#### Calculate the total length of cyclepath in buffer

Using the downloaded cycling path network, this function calculates the total length of cycling infrastructure found in te buffer around each counter location. 

Dependencies:
1. Buffers created
2. Cycle infrastructure downloaded

In [None]:
### Calculate total length of cyclepaths ###

total_cycle_list = []

for city in citynames_l:    
    
    city_path = directory + connect + city

    #Load in cycling network data and buffer data
    cycleways = gpd.read_file(city_path + "/cycleways.gpkg").to_crs(crs)
    buffer_gdkg = gpd.read_file(city_path + "/buffer_gpkg/point_buffers.gpkg").to_crs(crs)

    for index, buffer in buffer_gdkg.iterrows():
        clip = cycleways.clip(buffer.geometry)
        total_cycle = clip.length.sum()
        
        total_cycle_list.append(total_cycle)

df7  = pd.DataFrame()
df7['cycle_length'] = total_cycle_list

df7

In [52]:
#Export dataframe to csv
df7.to_csv('cycle_length_2.csv')

#### Extract and calculate building area

This code extracts the buildings in the buffer around each counter location and calculates that total amount of area that the buildings cover within each buffer. 

In [None]:
### Extract and calculate building area ###
build_dens_list = []

#Extracting buildings and calculating building density around points
for city in citynames_l:

    city_path = directory + connect + city
    
    #Load in buffers
    buffer_gdkg = gpd.read_file(city_path + "/buffer_gpkg/point_buffers.gpkg").to_crs(4326)

    for index, row in buffer_gdkg.iterrows():
        poly = row.geometry

        #Extracting buildings to calculate building density
        buildings = ox.geometries_from_polygon(poly, tags={'building': True})
        buildings = buildings.to_crs(3857)

        temp_l = []

        for index, x in buildings.iterrows():
            temp_l.append(x.geometry)

        b_series = gpd.GeoSeries(temp_l).area
        b_total_area = b_series.sum()
        build_dens_list.append(b_total_area)

    print(city + ' done')


df8 = pd.DataFrame()
df8['build_area'] = build_dens_list
df8

In [12]:
#Export dataframe to csv
df8.to_csv('building.csv')

#### Calculate streets per node

In [16]:
intersect_count = []

for city in citynames_l:    
    
    city_path = directory + connect + city

    #Load in street network data and buffer data
    buffer_gdkg = gpd.read_file(city_path + "/buffer_gpkg/point_buffers.gpkg").to_crs(4326)

    for polygon in buffer_gdkg['geometry']:
        try:
            G = ox.graph_from_polygon(polygon, network_type='drive')
            node_count = ox.stats.streets_per_node_counts(G)
        except:
            node_count = 0
        intersect_count.append(node_count)

    print(city)

Amsterdam
Rotterdam
New York
Seattle
Utrecht
Norfolk, Virginia
Arlington County
Austin
Minneapolis
Almere
Arnhem
London
Northampton
Leeds
Birmingham
Nijmegen
Apeldoorn
York
Duluth
Maastricht
Boulder
Portland
Cambridge
Denver
Philadelphia
Houston
Dallas
Glasgow City
Edinburgh
Aberdeen
Wageningen
Gouda
San Diego
Venlo
Amersfoort
Tampa
Gainesville
Tallahassee
Fort Lauderdale
Miami
The Hague
Leiden
Zeist
Dordrecht
Ede
Raleigh
Cary
Durham
Greensboro
Charlotte


In [25]:
data.to_csv('3_4_int.csv')

#### Calculate LST and save as new TIF file

This code takes the extracted Landsat 8 raster files in order to calculate a new raster file containing the Land Surface Temperature for each city. 

In [13]:
### Calculate LST layer###

for city in citynames_l:
    
    city_path = directory + '/' + city

    #Load in each band that was exported from GEE
    red = rasterio.open(city_path + "/LST_red.tif")
    nir = rasterio.open(city_path + "/LST_nir.tif")
    thermal = rasterio.open(city_path + "/LST_thermal.tif")

    #Read the bands to be used for calculation
    redImage = red.read(1).astype('f4')
    nirImage = nir.read(1).astype('f4')
    thermalImage = thermal.read(1).astype('f4')

    lst_image_single_window = single_window(thermalImage, redImage, nirImage, unit='celcius')

    #Define affine transformation
    affine = red.transform

    #Create new raster file with calculated variables
    with rasterio.open(
        city_path + "/LST_calculated.tif",
        mode="w",
        driver="GTiff",
        height=lst_image_single_window.shape[0],
        width=lst_image_single_window.shape[1],
        count=1,
        dtype=lst_image_single_window.dtype,
        crs=4326,
        transform=affine,
                        ) as new_dataset:
            new_dataset.write(lst_image_single_window, 1)


#### Calculate raster statistics

In [None]:
### Calculate raster statistics ###

#Define lists for each variable
ndvi_mean_lst = []
ndvi_std_lst = []
dem_mean_lst = []
dem_std_lst = []
lst_mean_lst = []
lst_std_lst = []
lc_entropy_lst = []
pop_mean_lst = []
pop_std_lst = []

def entropy(x):
    unique = np.unique(x, return_counts=True)
    unique_count = len(unique[0])
    unique_size = unique[1]
    total_size = x.size
    
    total = 0.0

    for y in unique_size:
        r = (y/total_size)*np.log(y/total_size)
        total = r + total

    c_entropy = (total/np.log(unique_count))*-1

    return c_entropy

for city in citynames_l:

    city_path = directory + connect + city

    #Load in the buffers around the counter locations
    buffer_gdkg = gpd.read_file(city_path + "/buffer_gpkg/point_buffers.gpkg").to_crs(4326)

    ##NDVI##
    #Load in population file
    ndvi = rasterio.open(city_path + "/NVDI.tif")
    arr = ndvi.read(1)
    affine = ndvi.transform

    statistics = zonal_stats(buffer_gdkg, arr, affine=affine, stats=['mean', 'std'])

    for x in statistics:
        ndvi_mean = x['mean']
        ndvi_std = x['std']

        ndvi_mean_lst.append(ndvi_mean)
        ndvi_std_lst.append(ndvi_std)

    ##DEM##
    #Load in population file
    dem = rasterio.open(city_path + "/DEM.tif")
    arr = dem.read(1)
    affine = dem.transform

    statistics = zonal_stats(buffer_gdkg, arr, affine=affine, stats=['mean', 'std'])

    for x in statistics:
        dem_mean = x['mean']
        dem_std = x['std']

        dem_mean_lst.append(dem_mean)
        dem_std_lst.append(dem_std)


    ##LST##
    #Load in lst files
    lst = rasterio.open(city_path + "/LST_calculated.tif")
    arr = lst.read(1)
    affine = lst.transform

    # #Load in buffer file
    buffer_gdkg = gpd.read_file(city_path + "/buffer_gpkg/point_buffers.gpkg").to_crs(4326)

    statistics = zonal_stats(buffer_gdkg, arr, affine=affine, stats=['mean', 'std'])

    for x in statistics:
        lst_mean = x['mean']
        lst_std = x['std']

        lst_mean_lst.append(lst_mean)
        lst_std_lst.append(lst_std)

    
    ##Populationn density##
    #Load in population files
    pop = rasterio.open(city_path + "/population.tif")
    arr = pop.read(1)
    affine = pop.transform

    # #Load in buffer file
    buffer_gdkg = gpd.read_file(city_path + "/buffer_gpkg/point_buffers.gpkg").to_crs(4326)

    statistics = zonal_stats(buffer_gdkg, arr, affine=affine, stats=['mean', 'std'])

    for x in statistics:
        pop_mean = x['mean']
        pop_std = x['std']

        pop_mean_lst.append(pop_mean)
        pop_std_lst.append(pop_std)

    
    ##Land Cover entropy##
    lc = rasterio.open(city_path + "/landcover.tif")
    arr = lc.read(1)
    affine = lc.transform

    # #Load in buffer file
    buffer_gdkg = gpd.read_file(city_path + "/buffer_gpkg/point_buffers.gpkg").to_crs(4326)

    statistics = zonal_stats(buffer_gdkg, arr, affine=affine, stats=['majority'], add_stats={'entropy':entropy})

    for x in statistics:
        lc_entropy = x['entropy']
        lc_entropy_lst.append(lc_entropy)

#Add all variables to dataframe
df9 = pd.DataFrame()
df9['ndvi_mean'] = ndvi_mean_lst
df9['ndvi_std'] = ndvi_std_lst
df9['dem_mean'] = dem_mean_lst
df9['dem_std'] = dem_std_lst
df9['lst_mean'] = lst_mean_lst
df9['lst_std'] = lst_std_lst
df9['pop_mean'] = pop_mean_lst
df9['pop_std'] = pop_std_lst
df9['lc_entropy'] = lc_entropy_lst

df9

In [17]:
#Export dataframe to csv
df9.to_csv('raster_calculations.csv')