In [60]:
import pandas as pd

predicted_data_puskesmas = pd.read_csv("puskesmas_data_predicted.csv")

predicted_data_puskesmas.head()

Unnamed: 0,reviewer_name,rating,review_text,puskesmas_name,cleaned_review_text,language,is_positive
0,Mira Diah V,5,Pelayanan di poli KIA bagus&informatif sekali....,Puskesmas Simomulyo,layan pol kia bagusinformatif anak imunisasi a...,id,1
1,Ranitya Dewi Ayu Sadian,1,Pelayanan poli KIA lama sekali…\nKamis pagi se...,Puskesmas Simomulyo,layan pol kia kamis pagi puskesmas daftar pol ...,id,0
2,Yuliasti Ika,5,"Puskesmas dengan pelayanan yang baik sekali, s...",Puskesmas Simomulyo,puskesmas layan layan ramah lengkap fasilitas ...,id,1
3,Yusup Jaya,1,"Pelayanan tambah lelet, mohon managentnya di p...",Puskesmas Simomulyo,layan lambat mohon managernya sat set rujuk ce...,id,0
4,Yunia Ardina,1,Saya mau kasih masukan sekaligus protes..\n\nS...,Puskesmas Simomulyo,kasih masuk protes tanggal february jaman prik...,id,0


In [61]:
detail_puskesmas = pd.read_csv("data_scrapping/cleaned_puskesmas.csv")

detail_puskesmas.head()

Unnamed: 0,name,kepala puskesmas,alamat,telepon,url,rate,review_num
0,Puskesmas Asemrowo,dr. Diah Miryati,Jln. Asem Raya No. 8,(031) 5456122 / 081252965070,https://www.google.com/maps/place/Puskesmas+As...,35,(131)
1,Puskesmas Balas Klumprik,"dr. Yessy Pebriaty Sukatendel, MM",Jln. Raya Balas Klumprik,(031) 7665219,https://www.google.com/maps/place/Puskesmas+Ba...,41,(96)
2,Puskesmas Balongsari,dr. Dewi Ayuning Asih,Jln. Balongsari Tama No. 2 Surabaya,(031) 7417104,https://www.google.com/maps/place/Puskesmas+Ba...,39,(308)
3,Puskesmas Bangkingan,dr. Tri Indah Rachmawati,Jln. Banyu Urip Kidul 6/8,(031) 5685424 - 5615292 / 081334932007,https://www.google.com/maps/place/Puskesmas+Ba...,38,(84)
4,Puskesmas Banyu Urip,"dr. Winartuti Santoso, M.Kes",Jln. Banyu Urip Kidul 6/8,(031) 7405936,https://www.google.com/maps/place/Puskesmas+Ba...,32,(170)


In [62]:
import plotly.express as px
from tqdm.notebook import tqdm
import requests
import random


def get_lat_lon(puskesmas_name):
    user_agents = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0",
        "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.110 Mobile Safari/537.36",
        "Mozilla/5.0 (iPad; CPU OS 13_7 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Mobile/15E148 Safari/604.1",
    ]
    attempts = 0
    while attempts < 5:
        try:
            headers = {"User-Agent": random.choice(user_agents)}
            url = f"https://nominatim.openstreetmap.org/search?q={puskesmas_name}, surabaya&format=jsonv2"
            response = requests.get(url, headers=headers)
            if response.status_code == 200:
                data = response.json()
                return data[0]["lat"], data[0]["lon"], data[0]["display_name"]
            else:
                attempts += 1
                print(f"Attempt {attempts}: Failed to get data for {puskesmas_name}")
        except Exception as e:
            attempts += 1
            print(f"Attempt {attempts}: Error for {puskesmas_name} - {e}")
    return None, None


tqdm.pandas()
detail_puskesmas["lat"], detail_puskesmas["lon"], detail_puskesmas["display_name"] = (
    zip(*detail_puskesmas["name"].progress_apply(get_lat_lon))
)

print(detail_puskesmas.columns)

detail_puskesmas = detail_puskesmas.rename(columns={"name": "puskesmas_name"})

positive_review_counts = predicted_data_puskesmas.groupby("puskesmas_name")[
    "is_positive"
].sum()
total_review_counts = predicted_data_puskesmas.groupby("puskesmas_name")[
    "is_positive"
].count()
percentage_positive_reviews = (
    positive_review_counts / total_review_counts * 100
).reset_index()
percentage_positive_reviews.columns = ["puskesmas_name", "positive_percentage"]

detail_puskesmas = detail_puskesmas.merge(
    percentage_positive_reviews, on="puskesmas_name", how="left"
)

  0%|          | 0/63 [00:00<?, ?it/s]

Index(['name', 'kepala puskesmas', 'alamat', 'telepon', 'url', 'rate',
       'review_num', 'lat', 'lon', 'display_name'],
      dtype='object')


In [71]:
data_for_plotting = detail_puskesmas[
    ["lat", "lon", "puskesmas_name", "positive_percentage"]
]

data_for_plotting["lat"] = pd.to_numeric(data_for_plotting["lat"], errors="coerce")
data_for_plotting["lon"] = pd.to_numeric(data_for_plotting["lon"], errors="coerce")
data_for_plotting["positive_percentage"] = data_for_plotting[
    "positive_percentage"
].fillna(0)

data_for_plotting = data_for_plotting[data_for_plotting["positive_percentage"] != 0]

fig = px.scatter_mapbox(
    data_for_plotting,
    lat="lat",
    lon="lon",
    color="positive_percentage",
    size="positive_percentage",
    hover_name="puskesmas_name",
    hover_data={"positive_percentage": True},
    size_max=15,
    zoom=10,
    center={
        "lat": data_for_plotting["lat"].mean(),
        "lon": data_for_plotting["lon"].mean(),
    },
    mapbox_style="open-street-map",
    color_continuous_scale=px.colors.sequential.Viridis,
    title="Puskesmas Positive Review Percentage",
)

fig.update_traces(marker=dict(size=64, opacity=0.7), selector=dict(mode="markers+text"))

fig.update_layout(
    legend_title_text="Positive Review %",
    title=dict(text="Puskesmas Positive Review Percentage", x=0.5, xanchor="center"),
)

fig.update_layout(width=800, height=800)
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [84]:
import pandas as pd
import plotly.express as px

data_for_plotting = detail_puskesmas[
    ["lat", "lon", "puskesmas_name", "positive_percentage"]
]

data_for_plotting["lat"] = pd.to_numeric(data_for_plotting["lat"], errors="coerce")
data_for_plotting["lon"] = pd.to_numeric(data_for_plotting["lon"], errors="coerce")
data_for_plotting["positive_percentage"] = data_for_plotting[
    "positive_percentage"
].fillna(0)

data_for_plotting = data_for_plotting[data_for_plotting["positive_percentage"] != 0]

top_10_data_for_plotting = data_for_plotting.sort_values(
    by="positive_percentage", ascending=False
).head(10)

fig = px.scatter_mapbox(
    top_10_data_for_plotting,
    lat="lat",
    lon="lon",
    color="positive_percentage",
    size="positive_percentage",
    hover_name="puskesmas_name",
    hover_data={"positive_percentage": True},
    size_max=15,
    zoom=10,
    center={
        "lat": top_10_data_for_plotting["lat"].mean(),
        "lon": top_10_data_for_plotting["lon"].mean(),
    },
    mapbox_style="open-street-map",
    color_continuous_scale=px.colors.sequential.Viridis,
    title="Top 10 Puskesmas with Positive Review Percentage",
)

fig.update_traces(marker=dict(size=28, opacity=0.7), textposition="top center")

fig.update_traces(
    hovertemplate="<b>%{hovertext}</b><br>Positivity Rate: %{marker.color:.2f}%"
)

fig.update_coloraxes(colorbar_title="Positivity Rate (%)")

fig.update_layout(
    legend=dict(
        title_font=dict(size=14, color="blue"),
        bgcolor="rgba(255,255,255,0.5)",
        bordercolor="Black",
        borderwidth=2,
    ),
    title=dict(
        text="Top 10 Puskesmas with Positive Review Percentage", x=0.5, xanchor="center"
    ),
    width=800,
    height=800,
)

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [81]:
import pandas as pd
import plotly.express as px

data_for_plotting = detail_puskesmas[
    ["lat", "lon", "puskesmas_name", "positive_percentage"]
]

data_for_plotting["lat"] = pd.to_numeric(data_for_plotting["lat"], errors="coerce")
data_for_plotting["lon"] = pd.to_numeric(data_for_plotting["lon"], errors="coerce")
data_for_plotting["positive_percentage"] = data_for_plotting[
    "positive_percentage"
].fillna(0)

data_for_plotting = data_for_plotting[data_for_plotting["positive_percentage"] != 0]

top_10_data_for_plotting = data_for_plotting.sort_values(
    by="positive_percentage", ascending=True
).head(10)

fig = px.scatter_mapbox(
    top_10_data_for_plotting,
    lat="lat",
    lon="lon",
    color="positive_percentage",
    size="positive_percentage",
    hover_name="puskesmas_name",
    hover_data={"positive_percentage": True},
    size_max=15,
    zoom=10,
    center={
        "lat": top_10_data_for_plotting["lat"].mean(),
        "lon": top_10_data_for_plotting["lon"].mean(),
    },
    mapbox_style="open-street-map",
    color_continuous_scale=px.colors.sequential.Viridis,
    title="Top 10 Puskesmas with Negative Review Percentage",
)

fig.update_traces(marker=dict(size=28, opacity=0.7), textposition="top center")

fig.update_traces(
    hovertemplate="<b>%{hovertext}</b><br>Positivity Rate: %{marker.color:.2f}%"
)

fig.update_coloraxes(colorbar_title="Positivity Rate (%)")

fig.update_layout(
    legend=dict(
        title_font=dict(size=14, color="blue"),
        bgcolor="rgba(255,255,255,0.5)",
        bordercolor="Black",
        borderwidth=2,
    ),
    title=dict(
        text="Top 10 Puskesmas with Negative Review Percentage", x=0.5, xanchor="center"
    ),
    width=800,
    height=800,
)

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [65]:
most_positive_review = detail_puskesmas.sort_values(
    "positive_percentage", ascending=False
).head(10)
most_positive_review

Unnamed: 0,puskesmas_name,kepala puskesmas,alamat,telepon,url,rate,review_num,lat,lon,display_name,positive_percentage
50,Puskesmas Sidotopo Wetan,dr. Evi Susanti,Jln. Randu 38,(031) 3767737,https://www.google.com/maps/place/Puskesmas+Si...,44,(537),-7.22658925,112.76350132938722,"Puskesmas Sidotopo Wetan, Jalan Randu, RW 10, ...",83.532934
24,Puskesmas Ketabang,dr. Joyce Hestia Nugrahanti,Jln. Jaksa Agung Suprapto No 10,(031) 5311999,https://www.google.com/maps/place/Puskesmas+Ke...,45,(377),-7.2580613,112.74677403040268,"Puskesmas Ketabang, Jalan Jimerto, RW 05, Keta...",80.357143
57,Puskesmas Tanjungsari,dr. Yunita Andriani,Jln. Raya Tanjungsari No.116,(031) 7497858,https://www.google.com/maps/place/Puskesmas+Ta...,42,(300),-7.25940435,112.6892628357994,"Puskesmas Tanjungsari, Jalan Raya Tandes Lor, ...",79.565217
56,Puskesmas Tanah Kali Kedinding,dr. Era Kartikawati,Jln. H. M. Noer 226,(031) 51501347,https://www.google.com/maps/place/Puskesmas+Ta...,42,(572),-7.23037625,112.77446968315311,"Puskesmas Tanah Kali Kedinding, Jalan Kedung C...",77.181208
42,Puskesmas Pucang Sewu,drg. Ummi Latifah,Jln. Pucang Anom Timur No. 72,(031) 5018527,https://www.google.com/maps/place/Puskesmas+Pu...,44,(464),-7.28659415,112.7555282626124,"Puskesmas Pucang Sewu, Jalan Kalibokor, RW 08,...",76.410256
1,Puskesmas Balas Klumprik,"dr. Yessy Pebriaty Sukatendel, MM",Jln. Raya Balas Klumprik,(031) 7665219,https://www.google.com/maps/place/Puskesmas+Ba...,41,(96),-7.3315249,112.69002720464968,"Puskesmas Balas Klumprik, Jalan Balas Klumprik...",75.0
25,Puskesmas Klampis Ngasem,drg. Esti Wuryaningtyas,Jln. Arif Rahman Hakim 99B,(031) 5992389,https://www.google.com/maps/place/Puskesmas+Kl...,43,(406),-7.289156200000001,112.77848995,"Puskesmas Klampis Ngasem, Jalan Klampis Aji II...",74.883721
61,Puskesmas Wonokromo,drg. Dwiana Yuniarti,Jln. Karangrejo VI/4,(031) 8281647 / 082143561190,https://www.google.com/maps/place/Puskesmas+Wo...,42,(349),-7.303176499999999,112.7303924779992,"Puskesmas Wonokromo, Jalan Karang Rejo VI, RW ...",74.86911
39,Puskesmas Pegirian,dr. Retno Widayanti,Jln. Karang Tembok No 39,(031) 3766179,https://www.google.com/maps/place/Puskesmas+Pe...,37,(67),-7.224634399999999,112.74691545896944,"Puskesmas Pegirian, Jalan Karang Tembok, RW 03...",74.074074
36,Puskesmas Ngagel Rejo,dr. Febria Sukmaini,Jln. Ngagel Dadi III No.17,(031) 5047055,https://www.google.com/maps/place/Puskesmas+Ng...,41,(357),-7.29296335,112.74878868683348,"Puskesmas Ngagel Rejo, Jalan Ngagel Dadi IV, R...",73.913043


In [66]:
most_negative_review = detail_puskesmas.sort_values(
    "positive_percentage", ascending=True
).head(10)
most_negative_review

Unnamed: 0,puskesmas_name,kepala puskesmas,alamat,telepon,url,rate,review_num,lat,lon,display_name,positive_percentage
34,Puskesmas Morokrembangan,dr. Nurul Ihsani,Jln. Tambakasri 13/7 Morokrembangan,(031) 7493259,https://www.google.com/maps/place/Puskesmas+Mo...,19,(108),-7.2347805,112.71244346706712,"Puskesmas Moro Krembangab, Jalan Tambak Asri X...",8.108108
49,Puskesmas Sidotopo,dr. Galih Satryo Utomo,"Jl. Pegirian No.239, Sidotopo",(031) 3767735,https://www.google.com/maps/place/Puskesmas+Si...,28,(80),-7.23034775,112.7458816190326,"Puskesmas Sidotopo, Jalan Sidodadi, RW 07, Sim...",21.95122
33,Puskesmas Mojo,"dr. Ratna Megasari, M.Kes",Jln. Mojo Klanggru Wetan 2/11,(031) 5932332,https://www.google.com/maps/place/Puskesmas+Mo...,26,(285),-7.270902149999999,112.77082440581847,"Puskesmas Mojo Surabaya, Jalan Mojo Klanggru W...",26.744186
22,Puskesmas Kenjeran,dr. Rosna Suswanti,Jl. Tambak Deres 2 Surabaya,089515356046,https://www.google.com/maps/place/Puskesmas+Ke...,32,(165),-7.24111075,112.79527909779556,"Puskesmas Kenjeran, Jalan Abdul Latif, RW 01, ...",33.333333
12,Puskesmas Gundih,"dr. Tita Pusparini, M. Kes",Jln. Margodadi 36 - 38 Surabaya,(031) 5476275,https://www.google.com/maps/place/Puskesmas+Gu...,31,(117),-7.24724475,112.72539045,"Puskesmas Gundih, Jalan Margodadi, RW 07, Gund...",40.740741
20,Puskesmas Kedungdoro,"dr. Diah Roichan Arifiani, M.Kes",Jln. Kaliasin Pompa 79-81 Surabaya,(031) 5345968,https://www.google.com/maps/place/Puskesmas+Ke...,32,(145),-7.26370155,112.7382415,"Puskesmas Kedungdoro, Jalan Kaliasin Gang VI, ...",43.678161
58,Puskesmas Tembok Dukuh,drg. Tiyas Pranadani,Jln. Kalibutuh No 26 Surabaya,(031) 5343410,https://www.google.com/maps/place/Puskesmas+Te...,37,(266),-7.254527449999999,112.71921325,"Puskesmas Tembok Dukuh, Jalan Kalibutuh, RW 02...",44.537815
18,Puskesmas Kalirungkut,dr. Bernadetta Martini,Jln. Rungkut Puskesmas No. 1,(031) 8700668,https://www.google.com/maps/place/Puskesmas+Ka...,34,(289),-7.322217999999999,112.77076210366916,"Puskesmas Kalirungkut, Jalan Puskesmas, RW 07,...",44.827586
4,Puskesmas Banyu Urip,"dr. Winartuti Santoso, M.Kes",Jln. Banyu Urip Kidul 6/8,(031) 7405936,https://www.google.com/maps/place/Puskesmas+Ba...,32,(170),-7.2715069,112.72002732026792,"Puskesmas Banyu Urip, Jalan Banyu Urip Kidul V...",45.098039
60,Puskesmas Wiyung,dr. Tatien Tjahjandari,Jln. Menganti Wiyung Pasar No. 1,(031) 7532885 / 082143561187,https://www.google.com/maps/place/Puskesmas+Wi...,34,(272),-7.30968085,112.67497509450482,"Puskesmas Pembantu Babatan, Jalan Babatan IV, ...",45.333333


In [3]:
import pandas as pd

data = pd.read_csv("data_scrapping/puskesmas_data.csv")
data.shape

(14891, 4)