# Darmstadt Urban Heat Typology Analysis
- Geoff & Avi - 10-26-24
- we're trying to understand the affect of different types of 'built environment' on urban heat patterns
- starting with data collected from Darmstadt

In [1]:
#import data libraries
import pandas as pd

#import geographic analysis libraries
import geopandas as gpd
from geopandas import GeoDataFrame
import shapely as shp
from shapely.geometry import Point
from shapely.geometry import shape
import os
import re
from fiona.crs import from_epsg
import pysal as ps
#set crs for entire analysis
crs = {'init': 'epsg:4326'}
pd.options.display.max_columns = 300
pd.options.display.max_rows = 100

In [2]:
#import machine learning / regression libraries
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LassoCV
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LogisticRegression
import statsmodels.api as sm

In [61]:
### read in data
temp_df = pd.read_excel('../data/UHT_paper_data.xlsx', sheet_name='Temp')
area_df = pd.read_excel('../data/darmstadt_area_table.xlsx')
temp_disagg_df = pd.read_excel('../data/HW_data_v2.xlsx', sheet_name='DT_mean')

In [62]:
area_df.head()

Unnamed: 0,Name_Bezir,EST11,EST12,EST13,EST1A,EST1B,EST2,EST3,EST4,EST5,EST6,EST7,EST8,Distist_area
0,110 Stadtzentrum,123300.0,,,,,,,,27900.0,,25200.0,115200.0,301500.0
1,120 Rheintor/Grafenstraße,,,,,,,,,,,,233100.0,399600.0
2,130 Hochschulviertel,115200.0,,,,,,,,36000.0,,,,416700.0
3,140 Kapellplatzviertel,18000.0,,,11700.0,35100.0,11700.0,45900.0,,177300.0,,,,491400.0
4,150 St. Ludwig mit Eichbergviertel,,,,33300.0,22500.0,18000.0,189000.0,,188100.0,21600.0,,,720900.0


In [63]:
area_df.fillna(0, inplace=True)

In [64]:
area_df.shape

(34, 14)

In [68]:
area_df.head()

Unnamed: 0,Name_Bezir,EST11,EST12,EST13,EST1A,EST1B,EST2,EST3,EST4,EST5,EST6,EST7,EST8,Distist_area
0,110 Stadtzentrum,123300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27900.0,0.0,25200.0,115200.0,301500.0
1,120 Rheintor/Grafenstraße,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,233100.0,399600.0
2,130 Hochschulviertel,115200.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36000.0,0.0,0.0,0.0,416700.0
3,140 Kapellplatzviertel,18000.0,0.0,0.0,11700.0,35100.0,11700.0,45900.0,0.0,177300.0,0.0,0.0,0.0,491400.0
4,150 St. Ludwig mit Eichbergviertel,0.0,0.0,0.0,33300.0,22500.0,18000.0,189000.0,0.0,188100.0,21600.0,0.0,0.0,720900.0


In [67]:
typology_list = ['EST11', 'EST12', 'EST13', 'EST1A', 'EST1B', 'EST2',
       'EST3', 'EST4', 'EST5', 'EST6', 'EST7', 'EST8']

In [69]:
for typology in typology_list:
    new_column_name = typology+'_pct'
    area_df[new_column_name] = area_df[typology] / area_df['Distist_area']

In [72]:
area_df_small = area_df[['Name_Bezir', 'EST11_pct', 'EST12_pct', 'EST13_pct', 'EST1A_pct', 'EST1B_pct',
       'EST2_pct', 'EST3_pct', 'EST4_pct', 'EST5_pct', 'EST6_pct', 'EST7_pct',
       'EST8_pct']]

In [73]:
area_df_small.rename(columns={'Name_Bezir': 'district'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  area_df_small.rename(columns={'Name_Bezir': 'district'}, inplace=True)


In [35]:
darmstadt_shp = gpd.read_file('../data/restatisticalanalysissos/20240506_Darmstadt_District_Typology_divided_shapefile.shp')

In [90]:
darmstadt_shp.head()

Unnamed: 0,Layer_left,Name_Bezir,Entity_lef,Handle,LyrFrzn,LyrOn,Color_left,Linetype_l,Elevation_,LineWt_lef,RefName_le,DocUpdate,DocId,GlobalWidt,Shape_Leng,Shape_Area,index_righ,OBJECTID,Entity_rig,Layer_righ,Color_righ,Linetype_r,Elevatio_1,LineWt_rig,RefName_ri,Shape_Le_1,Shape_Ar_1,Final_laye,geometry
0,EST11_park,110 Stadtzentrum,2DPolyline2DPolyline,6A7C7871,0,2,144,ContinuousContinuous,0.0,140,0.0,2024-04-252024-04-25,1.456447e+19,0.0,2015.933934,121041.413541,32,34,LWPolylineLWPolyline,10011001,14,ContinuousContinuous,0.0,50,0.0,5191.575715,599710.2,110 Stadtzentrum_EST11_park,"MULTIPOLYGON Z (((8.64996 49.87541 0.00000, 8...."
1,EST11_park,130 Hochschulviertel,2DPolyline,7871,0,1,72,Continuous,0.0,70,0.0,2024-04-25,7.282236e+18,0.0,1619.688981,113675.52811,17,18,LWPolyline,1001,7,Continuous,0.0,25,0.0,2811.905234,414414.6,130 Hochschulviertel_EST11_park,"POLYGON Z ((8.65074 49.88018 0.00000, 8.65276 ..."
2,EST11_park,140 Kapellplatzviertel,2DPolyline,78DB,0,1,72,Continuous,0.0,70,0.0,2024-04-25,7.282236e+18,0.0,630.405464,17939.223471,20,21,LWPolyline,1001,7,Continuous,0.0,25,0.0,3382.614724,496346.6,140 Kapellplatzviertel_EST11_park,"POLYGON Z ((8.65481 49.86413 0.00000, 8.65544 ..."
3,EST11_park,270 Am Ziegelbusch,2DPolyline,534B,0,1,72,Continuous,0.0,70,0.0,2024-04-25,7.282236e+18,0.0,1243.827946,19996.898504,9,10,LWPolyline,1001,7,Continuous,0.0,25,0.0,6249.417123,2431327.0,270 Am Ziegelbusch_EST11_park,"POLYGON Z ((8.66620 49.88473 0.00000, 8.66660 ..."
4,EST11_park,310 Am Oberfeld,2DPolyline,5DA3,0,1,72,Continuous,0.0,70,0.0,2024-04-25,7.282236e+18,0.0,237.168703,2566.217829,6,7,LWPolyline,1001,7,Continuous,0.0,25,0.0,20788.406205,19200960.0,310 Am Oberfeld_EST11_park,"POLYGON Z ((8.67007 49.87927 0.00000, 8.66998 ..."


In [37]:
darmstadt_shp_small = darmstadt_shp[['Final_laye', 'Layer_left', 'geometry']]

In [46]:
temp_disagg_df_small = temp_disagg_df[['Final_laye', 'AREA', 'HW_avg', 'not_HW_avg']]
temp_disagg_df_small.columns = ['distrct_typology', 'area', 'hw_avg', 'not_hw_avg']

In [78]:
lst_df = pd.read_excel('../data/darmstadt_LST.xlsx')

In [80]:
lst_df_small = lst_df[['Name_Bezir', 'HW_avg']]

In [82]:
lst_df_small.columns = ['district', 'HW_avg']

In [84]:
lst_df_small.shape

(34, 2)

In [85]:
area_df_small.shape

(34, 13)

In [86]:
area_w_temp = area_df_small.merge(lst_df_small)

In [87]:
area_w_temp.head()

Unnamed: 0,district,EST11_pct,EST12_pct,EST13_pct,EST1A_pct,EST1B_pct,EST2_pct,EST3_pct,EST4_pct,EST5_pct,EST6_pct,EST7_pct,EST8_pct,HW_avg
0,110 Stadtzentrum,0.408955,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.092537,0.0,0.083582,0.38209,39.43683
1,120 Rheintor/Grafenstraße,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.583333,39.200845
2,130 Hochschulviertel,0.276458,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.086393,0.0,0.0,0.0,37.654671
3,140 Kapellplatzviertel,0.03663,0.0,0.0,0.02381,0.071429,0.02381,0.093407,0.0,0.360806,0.0,0.0,0.0,37.708665
4,150 St. Ludwig mit Eichbergviertel,0.0,0.0,0.0,0.046192,0.031211,0.024969,0.262172,0.0,0.260924,0.029963,0.0,0.0,37.354722


In [88]:
area_w_temp.columns

Index(['district', 'EST11_pct', 'EST12_pct', 'EST13_pct', 'EST1A_pct',
       'EST1B_pct', 'EST2_pct', 'EST3_pct', 'EST4_pct', 'EST5_pct', 'EST6_pct',
       'EST7_pct', 'EST8_pct', 'HW_avg'],
      dtype='object')

In [89]:
### put together X and y for regression
y = area_w_temp.HW_avg
### get list of all new dummy columns and all other columns of interest
X_columns = [ 'EST11_pct', 'EST12_pct', 'EST13_pct', 'EST1A_pct',
       'EST1B_pct', 'EST2_pct', 'EST3_pct', 'EST4_pct', 'EST5_pct', 'EST6_pct',
       'EST7_pct', 'EST8_pct']
X = area_w_temp[X_columns]

### fit linear regression to X and y (not x train and y train)
model = sm.OLS(y.astype(float), X.astype(float)).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,HW_avg,R-squared (uncentered):,0.755
Model:,OLS,Adj. R-squared (uncentered):,0.622
Method:,Least Squares,F-statistic:,5.656
Date:,"Sat, 26 Oct 2024",Prob (F-statistic):,0.000233
Time:,11:19:27,Log-Likelihood:,-146.28
No. Observations:,34,AIC:,316.6
Df Residuals:,22,BIC:,334.9
Df Model:,12,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
EST11_pct,80.0690,73.572,1.088,0.288,-72.510,232.648
EST12_pct,758.2396,1197.497,0.633,0.533,-1725.218,3241.697
EST13_pct,205.9529,196.248,1.049,0.305,-201.040,612.946
EST1A_pct,83.9535,57.225,1.467,0.157,-34.725,202.632
EST1B_pct,97.2214,291.862,0.333,0.742,-508.063,702.506
EST2_pct,29.0899,145.638,0.200,0.844,-272.945,331.124
EST3_pct,104.8489,48.619,2.157,0.042,4.019,205.679
EST4_pct,181.8364,188.623,0.964,0.346,-209.343,573.016
EST5_pct,50.3617,18.144,2.776,0.011,12.733,87.991

0,1,2,3
Omnibus:,3.044,Durbin-Watson:,1.168
Prob(Omnibus):,0.218,Jarque-Bera (JB):,1.443
Skew:,-0.056,Prob(JB):,0.486
Kurtosis:,1.997,Cond. No.,72.7


In [92]:
temp_small = temp_disagg_df[['Name_Bezir', 'Typology', 'HW_avg']]

In [96]:
temp_small_w_dummies = pd.get_dummies(temp_small, columns=['Typology'])

In [98]:
temp_small_w_dummies.columns

Index(['Name_Bezir', 'HW_avg', 'Typology_EST11', 'Typology_EST12',
       'Typology_EST13', 'Typology_EST14', 'Typology_EST1A', 'Typology_EST1B',
       'Typology_EST2', 'Typology_EST3', 'Typology_EST4', 'Typology_EST5',
       'Typology_EST6', 'Typology_EST7', 'Typology_EST8'],
      dtype='object')

In [100]:
temp_small_w_dummies.shape

(192, 15)

In [101]:
temp_small_w_dummies.head()

Unnamed: 0,Name_Bezir,HW_avg,Typology_EST11,Typology_EST12,Typology_EST13,Typology_EST14,Typology_EST1A,Typology_EST1B,Typology_EST2,Typology_EST3,Typology_EST4,Typology_EST5,Typology_EST6,Typology_EST7,Typology_EST8
0,110 Stadtzentrum,35.961281,1,0,0,0,0,0,0,0,0,0,0,0,0
1,130 Hochschulviertel,35.774225,1,0,0,0,0,0,0,0,0,0,0,0,0
2,140 Kapellplatzviertel,35.394494,1,0,0,0,0,0,0,0,0,0,0,0,0
3,270 Am Ziegelbusch,35.325504,1,0,0,0,0,0,0,0,0,0,0,0,0
4,310 Am Oberfeld,35.544379,1,0,0,0,0,0,0,0,0,0,0,0,0


In [99]:
### put together X and y for regression
y = temp_small_w_dummies.HW_avg
### get list of all new dummy columns and all other columns of interest
X_columns = ['Typology_EST11', 'Typology_EST12',
       'Typology_EST13', 'Typology_EST14', 'Typology_EST1A', 'Typology_EST1B',
       'Typology_EST2', 'Typology_EST3', 'Typology_EST4', 'Typology_EST5',
       'Typology_EST6', 'Typology_EST7', 'Typology_EST8']
X = temp_small_w_dummies[X_columns]

### fit linear regression to X and y (not x train and y train)
model = sm.OLS(y.astype(float), X.astype(float)).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,HW_avg,R-squared:,0.305
Model:,OLS,Adj. R-squared:,0.258
Method:,Least Squares,F-statistic:,6.544
Date:,"Sat, 26 Oct 2024",Prob (F-statistic):,1.23e-09
Time:,11:36:39,Log-Likelihood:,-285.74
No. Observations:,192,AIC:,597.5
Df Residuals:,179,BIC:,639.8
Df Model:,12,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Typology_EST11,35.5639,0.420,84.770,0.000,34.736,36.392
Typology_EST12,34.8204,0.785,44.364,0.000,33.272,36.369
Typology_EST13,35.4892,0.335,106.042,0.000,34.829,36.150
Typology_EST14,36.8747,0.227,162.749,0.000,36.428,37.322
Typology_EST1A,36.2938,0.218,166.726,0.000,35.864,36.723
Typology_EST1B,36.9028,0.262,141.052,0.000,36.387,37.419
Typology_EST2,36.6112,0.214,171.388,0.000,36.190,37.033
Typology_EST3,36.7037,0.210,174.974,0.000,36.290,37.118
Typology_EST4,36.3117,0.287,126.700,0.000,35.746,36.877

0,1,2,3
Omnibus:,4.543,Durbin-Watson:,1.58
Prob(Omnibus):,0.103,Jarque-Bera (JB):,4.374
Skew:,-0.369,Prob(JB):,0.112
Kurtosis:,3.036,Cond. No.,3.74
