In [None]:
import pandas as pd
import geopandas as gpd
import shapely
import numpy as np
from pathlib import Path
import os
import rasterio
from rasterio.plot import show
from shapely.geometry import Point, Polygon
from rasterstats import zonal_stats
from rasterio.plot import show_hist
from rasterio.mask import mask
import matplotlib.pyplot as plt
import contextily as ctx
import seaborn as sns

## Load data

In [None]:
data_folder = Path('../Data/')
databases = Path('/Users/david/Dropbox/PhD/Data/')

In [None]:
cantons_ch = gpd.read_file('/Users/david/Dropbox/PhD/Data/Databases/Shapefiles/SHAPEFILE_LV95_LN02/swissBOUNDARIES3D_1_3_TLM_KANTONSGEBIET.shp')
cantons_ch = cantons_ch.to_crs(2056)

In [None]:
statpop = pd.read_csv('/Users/david/Dropbox/PhD/Data/Databases/OFS/ag-b-00.03-vz2020statpop/STATPOP2020.csv',sep = ';')
statpop_ha = statpop.copy()
geometry = [Point(xy) for xy in zip(statpop['E_KOORD'], statpop['N_KOORD'])]
statpop_point = gpd.GeoDataFrame(statpop, crs=2056, geometry=geometry)   

In [None]:
geometry = [Polygon(zip([xy[0],xy[0],xy[0]+100,xy[0]+100],[xy[1],xy[1]+100,xy[1]+100,xy[1]])) for xy in zip(statpop_ha.E_KOORD, statpop_ha.N_KOORD)]
statpop_ha = gpd.GeoDataFrame(statpop_ha, crs=2056, geometry=geometry)      

In [None]:
regbl_db = Path('/Users/david/Dropbox/PhD/GitHub/SanteIntegra/Data/raw/OFS/regBL/GWR_MADD_Export_MADD-20210920-A4_20220427/')

In [None]:
regbl_codebook = pd.read_excel(regbl_db/'GWRCodes.xlsx')

In [None]:
regbl_df_A = pd.read_csv(regbl_db/'GWR_MADD_EIN-03_Data_MADD-20210920-A4_20220427.dsv', sep = '\t')
regbl_df_B = pd.read_csv(regbl_db/'GWR_MADD_GEB-03_Data_MADD-20210920-A4_20220427.dsv', sep = '\t')
regbl_df_C = pd.read_csv(regbl_db/'GWR_MADD_GST-03_Data_MADD-20210920-A4_20220427.dsv', sep = '\t')
regbl_df_D = pd.read_csv(regbl_db/'GWR_MADD_WHG-03_Data_MADD-20210920-A4_20220427.dsv', sep = '\t')

regbl_df_A_codebook = pd.read_csv(regbl_db/'GWR_MADD_EIN-03_Readme_MADD-20210920-A4_20220427.dsv', sep = '\t')
regbl_df_B_codebook = pd.read_csv(regbl_db/'GWR_MADD_GEB-03_Readme_MADD-20210920-A4_20220427.dsv', sep = '\t')
regbl_df_C_codebook = pd.read_csv(regbl_db/'GWR_MADD_GST-03_Readme_MADD-20210920-A4_20220427.dsv', sep = '\t')
regbl_df_D_codebook = pd.read_csv(regbl_db/'GWR_MADD_WHG-03_Readme_MADD-20210920-A4_20220427.dsv', sep = '\t')

In [None]:
n_pieces_by_logement_by_egid = pd.DataFrame(regbl_df_D.groupby(['EGID','WAZIM']).size()).reset_index()
n_pieces_by_logement_by_egid.columns = ['EGID','n_pieces','n_logement']

In [None]:
regbl_df_D_geom = pd.merge(regbl_df_B[['EGID','GKODE','GKODN']].drop_duplicates(), regbl_df_D, on = 'EGID')

In [None]:
regbl_gdf_D = gpd.GeoDataFrame(regbl_df_D_geom, crs=2056, geometry=gpd.points_from_xy(regbl_df_D_geom.GKODE, regbl_df_D_geom.GKODN))

In [None]:
regbl_gdf_D = gpd.sjoin(regbl_gdf_D, cantons_ch[['NAME','geometry']], op = 'intersects')

## Données loyer OFS

In [None]:
microgis_geom = gpd.read_file('/Users/david/Dropbox/PhD/Data/Databases/Microgis_Geneva/JOOSTSPECIAL_NB_2018.shp')
microgis_data = pd.read_csv('/Users/david/Dropbox/PhD/Data/Databases/Microgis_Geneva/joost_microgis_data_2018.csv')

In [None]:
microgis_geom['area'] = microgis_geom.geometry.area

In [None]:
microgis_geom = microgis_geom.to_crs(2056)

In [None]:
microgis_logement = gpd.sjoin(microgis_geom[['NBID','geometry']],regbl_gdf_D[['EGID','EWID','estimated_rent','WAZIM','WAREA','geometry']], op = 'intersects')

In [None]:
# Neighborhood with the most housing units
microgis_logement.groupby('NBID').size().sort_values().idxmax()

In [None]:
microgis_geom[microgis_geom.NBID == 4878]

In [None]:
sns.histplot(data = microgis_data.dmdrent, bins = 30)

In [None]:
sns.histplot(data = microgis_data.dmdrent5, bins = 50)

In [None]:
microgis_geom = pd.merge(microgis_geom, microgis_data[['nbid','dmdrent','dmdrent1','dmdrent2','dmdrent3','dmdrent4','dmdrent5','dmdrent6']], left_on = 'NBID', right_on = 'nbid')

In [None]:
microgis_geom.plot('dmdrent', figsize = (15,15), legend = True)

Not very surpringly, it is the Lignon

In [None]:
loyer_canton_par_n_piece = pd.read_excel(data_folder/'raw/OFS/Loyers/je-f-09.03.03.01.xlsx', sheet_name = '2020')
loyer_canton_par_epoque_construction = pd.read_excel(data_folder/'raw/OFS/Loyers/je-f-09.03.03.02.xlsx', sheet_name = '2020_clean')
prix_m2_canton_par_n_piece = pd.read_excel(data_folder/'raw/OFS/Loyers/je-f-09.03.03.05.xlsx', sheet_name = '2020')

In [None]:
dict_cantons = {'Zurich':'Zürich','Berne':'Bern','Tessin':'Ticino',
                'Saint-Gall':'St. Gallen','Grisons':'Graubünden',
                'Argovie':'Aargau','Soleure':'Solothurn','Appenzell Rh.-Ext.':'Appenzell Ausserrhoden',
                'Appenzell Rh.-Int.':'Appenzell Innerrhoden','Bâle-Ville': 'Basel-Stadt','Zoug':'Zug',
                'Thurgovie':'Thurgau','Lucerne':'Luzern','Schaffhouse':'Schaffhausen','Bâle-Campagne':'Basel-Landschaft',
                'Glaris':'Glarus','Nidwald':'Nidwalden','Obwald':'Obwalden', 'Suisse':'Suisse','Genève':'Genève','Vaud':'Vaud',
                'Fribourg':'Fribourg','Schwyz':'Schwyz','Uri':'Uri', 'Valais':'Valais','Neuchâtel':'Neuchâtel','Jura':'Jura'
        }

In [None]:
prix_m2_canton_par_n_piece['Canton'] = prix_m2_canton_par_n_piece['Canton'].map(dict_cantons, na_action = 'ignore')

In [None]:
def rent(canton, squared_m, n_rooms):
    try:
        if n_rooms >= 6:
            n_rooms = '6p+'
        else:
            n_rooms = str(int(n_rooms)) + 'p'
        col_name = 'avgRent'+n_rooms
        return prix_m2_canton_par_n_piece[prix_m2_canton_par_n_piece.Canton == canton][col_name].to_list()[0] * squared_m
    except:
        return np.nan

In [None]:
regbl_gdf_D[regbl_gdf_D.EGID == 1004845]

In [None]:
regbl_gdf_D['estimated_rent'] = regbl_gdf_D.apply(lambda x : rent(x.NAME,x.WAREA,x.WAZIM), axis = 1)

In [None]:
regbl_gdf_D = regbl_gdf_D.drop('index_right', axis = 1)

In [None]:
lignon_housing = regbl_gdf_D[regbl_gdf_D.EGID.isin(microgis_logement[microgis_logement.NBID == 4878].EGID.unique())]

In [None]:
lignon_housing[lignon_housing.WAZIM == 1].estimated_rent.median()

In [None]:
microgis_logement.loc[microgis_logement.WAZIM >= 6, 'WAZIM'] = 6

In [None]:
estimated_rent_by_nbid = microgis_logement.groupby(['NBID','WAZIM']).median()[['estimated_rent']].reset_index()

In [None]:
estimated_rent_by_nbid = estimated_rent_by_nbid.pivot(index = 'NBID', columns = 'WAZIM',values = 'estimated_rent')

In [None]:
estimated_rent_by_nbid['estimated_dmdrent'] = microgis_logement.groupby('NBID').median()['estimated_rent']

In [None]:
test = pd.merge(estimated_rent_by_nbid, microgis_geom, left_index = True, right_on = "NBID")

In [None]:
test = gpd.GeoDataFrame(test, crs = 2056, geometry = test['geometry'])

In [None]:
test[test.estimated_dmdrent < 7000].plot('estimated_dmdrent', figsize = (15,15), legend = True)