In [40]:
import geopandas as gpd
import os
import pandas as pd
from scipy.io import netcdf
import netCDF4
ROOTFOLDER = "D:/Dropbox (Personal)/Personal Work/_Projects2023/01_city-never-was/_data"
RAW_PATH = "D:/Dropbox (Personal)/Personal Work/_commondata/_world_data/01_carbon_emission_ffdas/ffdas_flux_2013_2015.nc.gz/ffdas_flux_2015b.nc/ffdas_flux_2015.nc"
TRANSFORM_FOLDER = "D:/Dropbox (Personal)/Personal Work/_Projects2023/01_city-never-was/_data/_transformed/t_city_profiles"

In [2]:
nc = netCDF4.Dataset(RAW_PATH)
nc.variables.keys()

dict_keys(['latitude', 'latitude_edge', 'longitude', 'longitude_edge', 'flux'])

In [3]:
# convert the netcdf to a pandas dataframe
df = pd.DataFrame(nc.variables["flux"][:])
df["latitude"] = nc.variables["latitude"][:]
df.set_index('latitude', inplace=True)
df.columns = nc.variables["longitude"][:]
# df["longitude"] = nc.variables["longitude"][:]
df = df.stack().reset_index().rename(columns={0: 'flux','level_1':'longitude'})

In [4]:
RAW_FOLDER = f"{ROOTFOLDER}/_raw/_city_profiles"
df.to_csv(os.path.join(RAW_FOLDER, "r_ffdas_flux_2015.csv"), index=False)

## Aggregate to each city

In [8]:
# load all zone area for selected city
ROOTFOLDER = "D:/Dropbox (Personal)/Personal Work/_Projects2023/01_city-never-was/_data"
RAW_BOUND_FOLDER = f"{ROOTFOLDER}/_raw/r_boundary_osm"
TRANSFORM_FOLDER = f"{ROOTFOLDER}/_transformed/t_city_profiles/"
boundfiles = [f for f in os.listdir(RAW_BOUND_FOLDER) if f.endswith('.geojson')]
print("loading boundary files: ", len(boundfiles))

loading boundary files:  127


In [16]:
allbound = []
for f in boundfiles:
    temp = gpd.read_file(os.path.join(RAW_BOUND_FOLDER, f))
    temp['city_lower'] = f.split(".")[0]
    temp = temp[['city_lower','geometry']].to_crs("EPSG:4326")
    allbound.append(temp)
allbound = pd.concat(allbound).reset_index(drop = True)
allbound.head()
    

Unnamed: 0,city_lower,geometry
0,accra,"POLYGON ((-0.28413 5.57195, -0.28386 5.57090, ..."
1,amsterdam,"MULTIPOLYGON (((4.72878 52.40071, 4.75607 52.3..."
2,antwerp,"POLYGON ((4.21758 51.37389, 4.21826 51.37221, ..."
3,astrakhan,"POLYGON ((47.87112 46.26966, 47.87152 46.26632..."
4,athens,"MULTIPOLYGON (((22.89876 36.19485, 22.89888 36..."


In [39]:
allbound['city_lower'].unique()

array(['accra', 'amsterdam', 'antwerp', 'astrakhan', 'athens', 'auckland',
       'bacolod', 'bangalore', 'bangkok', 'belgrade', 'belohorizonte',
       'berezniki', 'berlin', 'bogotá', 'boston', 'brussels', 'budapest',
       'buenosaires', 'capetown', 'cebucity', 'chicago', 'cirebon',
       'cleveland', 'cochabamba', 'copenhagen', 'culiacan', 'curitiba',
       'delhi', 'denver', 'detroit', 'dhaka', 'dubai', 'dzerzhinsk',
       'florianopolis', 'fukuoka', 'gaborone', 'gainesville', 'gombe',
       'guadalajara', 'guatemalacity', 'hindupur', 'hongkong', 'houston',
       'hyderabad', 'ilheus', 'istanbul', 'jaipur', 'jakarta', 'jalna',
       'jequie', 'jerusalem', 'johannesburg', 'kampala', 'kanpur',
       'kaunas', 'kigali', 'killeen', 'kozhikode', 'kualalumpur', 'kyiv',
       'lagos', 'lemans', 'lima', 'london', 'losangeles', 'madrid',
       'malegaon', 'manchester', 'manila', 'medan', 'metromanila',
       'mexicocity', 'miami', 'milan', 'minneapolis', 'modesto',
       'montr

In [14]:
# fix the data
# temp = gpd.read_file(os.path.join(RAW_BOUND_FOLDER, "athens.geojson"))
# temp.crs = "EPSG:2100"
# temp = temp.to_crs("EPSG:4326")
# temp.to_file(os.path.join(RAW_BOUND_FOLDER, "athens.geojson"), driver='GeoJSON')

In [18]:
allbound.to_file(os.path.join(TRANSFORM_FOLDER, "t_city_boundary.geojson"), driver='GeoJSON')

In [43]:
df = pd.read_csv(os.path.join(RAW_FOLDER, "r_ffdas_flux_2015.csv"))
df = df[df['flux']>0].reset_index(drop = True)
df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326")
df_buffer = df.copy()
df_buffer['geometry'] = df_buffer.geometry.buffer(0.1) # original data is at 0.1 degree resolution


  df_buffer['geometry'] = df_buffer.geometry.buffer(0.1)


In [44]:
seldf = gpd.sjoin(df_buffer[['flux','geometry','latitude','longitude']], allbound, how = 'inner'
                 )

In [46]:
# first exproti
seldfagg = seldf.groupby('city_lower').agg({'flux':['sum', 'mean','count']}).reset_index().rename(
    columns = {'sum':'total_flux', 'mean':'mean_flux','count':'n_points'})

seldfagg.columns = seldfagg.columns.droplevel(0)
seldfagg.columns = ['city_lower', 'total_flux', 'mean_flux','flux_n_points']
seldfagg.sort_values("total_flux", ascending=False).to_csv(os.path.join(TRANSFORM_FOLDER, "t_ffdas_flux_2015.csv"), index=False)

127

In [62]:
# transform the data to hexagon
import h3pandas
h3_df = []
for res in [6,9,12]:
    temp = seldf.h3.polyfill(resolution = res)
    h3_df.append(pd.DataFrame(temp, columns = [f"h3_{res}"]))
    print(f"resolution {res} done")

h3_df = pd.concat(h3_df, axis = 1).reset_index(drop = True)

In [38]:
seldf.drop(['geometry', 'index_right', 'latitude', 'longitude'], axis = 1).to_csv(os.path.join(TRANSFORM_FOLDER, "t_ffdas_flux_hex_2015.csv"), index=False)