In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import os
import numpy as np

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tqdm.notebook import tqdm

In [18]:
KEPADATAN = 'data/grid_kepadatan_penduduk.geojson'
FASKES = 'data/akses_puskesmas_30.geojson'
BEBAN = 'data/beban_puskesmas.csv'
RESULT = 'data/kemudahan_akses_ntb.geojson'

In [19]:
faskes = gpd.read_file(FASKES)
kepadatan = gpd.read_file(KEPADATAN)

In [20]:
kepadatan = kepadatan.drop_duplicates('id', keep='first')

In [21]:
merged = pd.merge(kepadatan, faskes[['id_left', 'kdppk', 'nmppk', 'distance', 'total_doctors']], left_on='id', right_on='id_left', how='left')

In [22]:
merged = pd.merge(kepadatan, faskes[['id_left', 'kdppk', 'nmppk', 'distance', 'total_doctors', 'geometry']], how='left')

In [23]:
beban = merged.groupby('kdppk').agg({'pdd_grid_desa': 'sum'}).reset_index()
beban = beban.rename(columns={'pdd_grid_desa': 'beban_faskes'})

In [24]:
beban.to_csv(BEBAN)

In [25]:
merged_beban = pd.merge(merged, beban)

In [26]:
final = merged_beban[['id', 'distance', 'total_doctors', 'beban_faskes']]

In [27]:
scaler = StandardScaler()
df_scaled = scaler.fit_transform(final[['distance', 'total_doctors', 'beban_faskes']])

In [28]:
kmeans = KMeans(n_clusters=2, random_state=42)
labels = kmeans.fit_predict(df_scaled)

final.loc[:, 'Cluster'] = labels

centroids = kmeans.cluster_centers_
centroids_df = pd.DataFrame(centroids, columns=['distance', 'total_doctors', 'beban_faskes'])

good_access_cluster = centroids_df.idxmin(axis=0)
bad_access_cluster = centroids_df.idxmax(axis=0)

if centroids_df.loc[0, 'distance'] < centroids_df.loc[1, 'distance']:
    final.loc[:, 'Access_Label'] = final['Cluster'].apply(lambda x: 'Good' if x == 0 else 'Bad')
else:
    final.loc[:, 'Access_Label'] = final['Cluster'].apply(lambda x: 'Good' if x == 1 else 'Bad')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final.loc[:, 'Cluster'] = labels
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final.loc[:, 'Access_Label'] = final['Cluster'].apply(lambda x: 'Good' if x == 1 else 'Bad')


In [29]:
grid = merged.loc[merged['id_left'].isnull()==False, ['id', 'geometry']]

In [30]:
result = pd.merge(final[['id', 'Access_Label']], grid, how='right')
result.loc[result['Access_Label'].isnull()==True, 'Access_Label'] = 'Bad'
result = pd.merge(result, kepadatan[['id', 'geometry']], how='right')
result.loc[result['Access_Label'].isnull()==True, 'Access_Label'] = 'Unpopulated'
result = gpd.GeoDataFrame(result, geometry='geometry')

In [None]:
result.to_file(RESULT)