## Scrape Detik

In [6]:
from requests import get
from urllib.parse import urlsplit
from bs4 import BeautifulSoup

class DetikNewsApi:

    def __init__(self):
        """ Search URL"""
        self.search_url = 'https://www.detik.com/search/searchall?'

    def build_search_url(self, query: str, page_number: int):
        """ Building search url with query input, we can jump to specific page number"""
        qs = f'query={query}'
        qs2 = '&siteid=2&sortby=time&fromdatex=01/01/2019&todatex=25/04/2024&result_type=relevansi&page='
        return self.search_url + qs + qs2 + str(page_number)

    def build_detail_url(self, url: str):
        """ Build detail URL will turn off pagination in detail page """
        a = urlsplit(url)
        qs = 'single=1'
        detail_url = a.scheme + '://' + a.netloc + a.path + '?' + qs
        return detail_url

    def result_count(self, search_response):
        """ Search result count, need search response page """
        soup = BeautifulSoup(search_response.text, 'html.parser')
        tag = soup.find('span', 'fl text').text
        count = [int(s) for s in tag.split() if s.isdigit()]
        return count[0]

    def detail(self, url: str) -> str:
        detail_url = self.build_detail_url(url)
        req = get(detail_url)
        soup = BeautifulSoup(req.text, 'html.parser')
        tag = soup.find('div', class_="detail__body-text")
        body = ''
        if tag.find_all('p'):
            for i in tag.find_all('p'):
                body += i.text
        else:
            body += tag.text
            
        return body

    def get_comments(self, url: str) -> list:
        detail_url = self.build_detail_url(url)
        req = get(detail_url)
        soup = BeautifulSoup(req.text, 'html.parser')
        comment_tags = soup.select('.komentar-iframe-min-media__desc')
        comments = []
        for comment_tag in comment_tags:
            comments.append(comment_tag.text.strip())
        return comments

    def parse(self, search_response, detail):
        soup = BeautifulSoup(search_response.text, 'html.parser')
        tag = soup.find_all('article')
        data = []

        for i in tag:
            judul = i.find('h2').text
            link = i.find('a').get('href')
            gambar = i.find('img').get('src')
            body = ''
            if detail:
                body = self.detail(link)

            comments = []
            if detail:
                comments = self.get_comments(link)

            waktu = i.find('span', class_="date").text
            data.append({'judul': judul,
                    'link': link,
                    'gambar': gambar,
                    'body': body,
                    'waktu': waktu,
                    'comments': comments
                    })
        return data

    def search(self, query, page_number=1, detail=False):
        url = self.build_search_url(query, page_number)
        search_response = get(url)
        parse_result = self.parse(search_response, detail)
        return parse_result


In [19]:
detik = DetikNewsApi()

# isi = []

# for i in range(10):
#     # method search(query, page_number, detail)
#     page_num = i+362
#     print(f"Scrape page ke-{page_num}")
#     res_detik = detik.search('stunting', page_num, True)
#     print(res_detik)
#     print()
#     isi.extend(res_detik)

res_detik = detik.search('stunting', 1, True)


Scrape page ke-362
[]

Scrape page ke-363
[]

Scrape page ke-364
[]

Scrape page ke-365
[]

Scrape page ke-366
[]

Scrape page ke-367


KeyboardInterrupt: 

In [21]:
res_detik = detik.search('stunting', 361, True)
res_detik

[{'judul': 'Rangkuman 3 Capres-Cawapres di Hari Pertama Kampanye',
  'link': 'https://news.detik.com/pemilu/d-7062002/rangkuman-3-capres-cawapres-di-hari-pertama-kampanye',
  'gambar': 'https://akcdn.detik.net.id/visual/2023/11/02/ilustrasi-3-capres-cawapres-2024_43.png?w=250&q=',
  'body': 'Selesai sudah para capres dan cawapres menjalani kampanye perdananya untuk Pilpres 2024. Tiga paslon berpencar ke seluruh wilayah RI untuk merebut suara rakyat.Di hari Pertama, capres nomor urut 1 Anies Baswedan kampanye di Jabodetabek. Anies memulai kampanyenya dengan sumngkem ke Ibundanya meminta doa restu.Anies kemudian mengawali titik kampanye di Tanah Merah, Jakarta Utara, kemudian lanjut ke Bogor. Sementara itu, cawapresnya Muhaimin Iskandra atau Cak Imin, kampanye di Jatim.\r\nADVERTISEMENT\r\n\r\nSCROLL TO CONTINUE WITH CONTENT\r\nAdapun paslon nomor urut 2, Prabowo Subianto dan Gibran Rakabuming Raka tak langsung berkampanye di hari pertama. Prabowo masih menjalankan tugas sebagai Menhan d

In [20]:
df = pd.DataFrame(isi)
df

Unnamed: 0,judul,link,gambar,body,waktu,comments
0,"Ikuti Pesan Jokowi, Banyuwangi Fokus Soal SDM ...",https://news.detik.com/berita-jawa-timur/d-466...,https://akcdn.detik.net.id/visual/2019/08/16/2...,\n\n\nBanyuwangi - Pidato Kenegaraan Presiden ...,"detikNewsJumat, 16 Agu 2019 19:13 WIB",[]
1,Sederet Rencana Besar Jokowi Genjot Kualitas S...,https://finance.detik.com/berita-ekonomi-bisni...,https://akcdn.detik.net.id/visual/2019/07/08/b...,\r\n ADVERTISEMENT\r\n \r\n SCROLL TO C...,"detikFinanceJumat, 16 Agu 2019 12:08 WIB",[]
2,"Kualitas SDM RI Digenjot hingga ke Daerah, Ini...",https://finance.detik.com/berita-ekonomi-bisni...,https://akcdn.detik.net.id/visual/2019/07/25/4...,\r\n ADVERTISEMENT\r\n \r\n SCROLL TO C...,"detikFinanceSenin, 19 Agu 2019 16:05 WIB",[]
3,Mensos Jawab Isu Politisasi Bansos hingga Peny...,https://news.detik.com/berita/d-4674223/mensos...,https://akcdn.detik.net.id/visual/2019/08/21/2...,Menteri Sosial Agus Gumiwang Kartasasmita meny...,"detikNewsRabu, 21 Agu 2019 07:05 WIB",[]
4,KPAI Harap DPR Segera Sahkan Usia Minimum Nika...,https://news.detik.com/berita/d-4705986/kpai-h...,https://akcdn.detik.net.id/visual/2019/04/02/e...,\r\nADVERTISEMENT\r\n\r\nSCROLL TO CONTINUE WI...,"detikNewsSabtu, 14 Sep 2019 04:32 WIB",[]
...,...,...,...,...,...,...
519,"Jabar Hari Ini: Pria Kejam di Kuningan, Cabuli...",https://www.detik.com/jabar/berita/d-7071739/j...,https://akcdn.detik.net.id/visual/2023/05/03/i...,Sejumlah peristiwa mewarnai pemberitaan di Jaw...,"detikJabarSenin, 04 Des 2023 22:00 WIB",[]
520,Rangkuman 3 Capres-Cawapres di Hari Pertama Ka...,https://news.detik.com/pemilu/d-7062002/rangku...,https://akcdn.detik.net.id/visual/2023/11/02/i...,Selesai sudah para capres dan cawapres menjala...,"detikNewsRabu, 29 Nov 2023 08:58 WIB",[]
521,Gejala Global Pernikahan Dini Melonjak di Masa...,https://news.detik.com/berita/d-5180276/gejala...,https://akcdn.detik.net.id/visual/2018/04/14/2...,Pernikahan di usia dini meningkat selama masa ...,"detikNewsMinggu, 20 Sep 2020 06:28 WIB",[]
522,Si Mungil Sacha Inchi Energi Baru Satu Padu,https://www.detik.com/jatim/bisnis/d-7019536/s...,https://akcdn.detik.net.id/visual/2023/11/05/k...,Aroma harum menyeruak dari dalam sebuah dapur ...,"detikJatimMinggu, 05 Nov 2023 07:30 WIB",[]


In [22]:
import pandas as pd

df = pd.DataFrame(isi)
df

Unnamed: 0,judul,link,gambar,body,waktu,comments
0,"Ikuti Pesan Jokowi, Banyuwangi Fokus Soal SDM ...",https://news.detik.com/berita-jawa-timur/d-466...,https://akcdn.detik.net.id/visual/2019/08/16/2...,\n\n\nBanyuwangi - Pidato Kenegaraan Presiden ...,"detikNewsJumat, 16 Agu 2019 19:13 WIB",[]
1,Sederet Rencana Besar Jokowi Genjot Kualitas S...,https://finance.detik.com/berita-ekonomi-bisni...,https://akcdn.detik.net.id/visual/2019/07/08/b...,\r\n ADVERTISEMENT\r\n \r\n SCROLL TO C...,"detikFinanceJumat, 16 Agu 2019 12:08 WIB",[]
2,"Kualitas SDM RI Digenjot hingga ke Daerah, Ini...",https://finance.detik.com/berita-ekonomi-bisni...,https://akcdn.detik.net.id/visual/2019/07/25/4...,\r\n ADVERTISEMENT\r\n \r\n SCROLL TO C...,"detikFinanceSenin, 19 Agu 2019 16:05 WIB",[]
3,Mensos Jawab Isu Politisasi Bansos hingga Peny...,https://news.detik.com/berita/d-4674223/mensos...,https://akcdn.detik.net.id/visual/2019/08/21/2...,Menteri Sosial Agus Gumiwang Kartasasmita meny...,"detikNewsRabu, 21 Agu 2019 07:05 WIB",[]
4,KPAI Harap DPR Segera Sahkan Usia Minimum Nika...,https://news.detik.com/berita/d-4705986/kpai-h...,https://akcdn.detik.net.id/visual/2019/04/02/e...,\r\nADVERTISEMENT\r\n\r\nSCROLL TO CONTINUE WI...,"detikNewsSabtu, 14 Sep 2019 04:32 WIB",[]
...,...,...,...,...,...,...
519,"Jabar Hari Ini: Pria Kejam di Kuningan, Cabuli...",https://www.detik.com/jabar/berita/d-7071739/j...,https://akcdn.detik.net.id/visual/2023/05/03/i...,Sejumlah peristiwa mewarnai pemberitaan di Jaw...,"detikJabarSenin, 04 Des 2023 22:00 WIB",[]
520,Rangkuman 3 Capres-Cawapres di Hari Pertama Ka...,https://news.detik.com/pemilu/d-7062002/rangku...,https://akcdn.detik.net.id/visual/2023/11/02/i...,Selesai sudah para capres dan cawapres menjala...,"detikNewsRabu, 29 Nov 2023 08:58 WIB",[]
521,Gejala Global Pernikahan Dini Melonjak di Masa...,https://news.detik.com/berita/d-5180276/gejala...,https://akcdn.detik.net.id/visual/2018/04/14/2...,Pernikahan di usia dini meningkat selama masa ...,"detikNewsMinggu, 20 Sep 2020 06:28 WIB",[]
522,Si Mungil Sacha Inchi Energi Baru Satu Padu,https://www.detik.com/jatim/bisnis/d-7019536/s...,https://akcdn.detik.net.id/visual/2023/11/05/k...,Aroma harum menyeruak dari dalam sebuah dapur ...,"detikJatimMinggu, 05 Nov 2023 07:30 WIB",[]


In [52]:
# Menggabungkan semua komentar dalam satu teks
all_comments = df["comments"]
all_comments_text = ' '.join([comment for sublist in all_comments for comment in sublist])

# Mengonversi teks ke dalam bentuk Series
all_comments_series = pd.Series(all_comments_text.split())

# Menghitung jumlah nilai unik dalam Series
unique_comments_count = all_comments_series.nunique()
print("Jumlah nilai unik komentar:", unique_comments_count)


Jumlah nilai unik komentar: 0


  all_comments_series = pd.Series(all_comments_text.split())


In [29]:
df.tail(10)

Unnamed: 0,judul,link,gambar,body,waktu,comments
670,Muhadjir Bicara Ketimpangan Gender: Mayoritas ...,https://news.detik.com/berita/d-6899347/muhadj...,https://akcdn.detik.net.id/community/media/vis...,Menko PMK Muhadjir Effendy berbicara terkait t...,"detikNewsSenin, 28 Agu 2023 11:40 WIB",[]
671,"Yuk Ikut Bergerak Wujudkan Kemajuan Bangsa, Be...",https://news.detik.com/berita/d-6896763/yuk-ik...,https://akcdn.detik.net.id/community/media/vis...,Pembangunan negeri bukan hanya menjadi tanggun...,"detikNewsSabtu, 26 Agu 2023 11:53 WIB",[]
672,"Diundang Harvard Medical School, Bupati Ipuk A...",https://www.detik.com/jatim/berita/d-6713700/d...,https://akcdn.detik.net.id/community/media/vis...,Banyuwangi mendapat kehormatan mempresentasika...,"detikJatimKamis, 11 Mei 2023 03:00 WIB",[]
673,Blue Band Gaungkan Pentingnya Konsumsi Sarapan...,https://food.detik.com/berita-boga/d-6893696/b...,https://akcdn.detik.net.id/community/media/vis...,Blue Band mengadakan Program Kampanye Nasional...,"detikFoodKamis, 24 Agu 2023 16:14 WIB",[]
674,Curhat Zaskia Mecca Anaknya Kena Imbas Polusi ...,https://health.detik.com/berita-detikhealth/d-...,https://akcdn.detik.net.id/community/media/vis...,"Alih-alih membaik, polusi udara di Jakarta kia...","detikHealthKamis, 24 Agu 2023 13:30 WIB",[]
675,Menko PMK Bakal Berkantor di Papua Awal Septem...,https://news.detik.com/berita/d-6892353/menko-...,https://akcdn.detik.net.id/community/media/vis...,Menteri Koordinator Bidang Pembangunan Manusia...,"detikNewsRabu, 23 Agu 2023 21:05 WIB",[]
676,"Kampanye di Tempat Kelahiran SBY, Ibas Mohon D...",https://news.detik.com/pemilu/d-7186063/kampan...,https://akcdn.detik.net.id/community/media/vis...,Calon legislatif (caleg) DPR RI nomor urut 1 P...,"detikNewsSabtu, 10 Feb 2024 11:25 WIB",[]
677,"KBMKB Banyuaeng Ditutup, Kodim 0723 Harap Pers...",https://www.detik.com/jateng/berita/d-6889899/...,https://akcdn.detik.net.id/community/media/vis...,Upacara penutupan Karya Bhakri Mandiri Klaten ...,"detikJatengSelasa, 22 Agu 2023 16:56 WIB",[]
678,Bupati Klaten Apresiasi TNI-Polri yang Dukung ...,https://www.detik.com/jateng/berita/d-6889501/...,https://akcdn.detik.net.id/community/media/vis...,Bupati Klaten Sri Mulyani menyampaikan apresia...,"detikJatengSelasa, 22 Agu 2023 14:20 WIB",[]
679,Capaian Kegiatan KBMKB ke-16 di Banyuaeng Klat...,https://www.detik.com/jateng/berita/d-6889497/...,https://akcdn.detik.net.id/community/media/vis...,Program Karya Bhakti Mandiri Klaten Bersinar (...,"detikJatengSelasa, 22 Agu 2023 14:17 WIB",[]


In [23]:
df.to_csv('detik11(4).csv', index=False)

In [26]:
import pandas as pd

In [27]:
df1 = pd.read_csv("detik11.csv")
df1

Unnamed: 0,judul,link,gambar,body,waktu,comments
0,Ada Rumah Gizi di Balik Sukses Demak Turunkan ...,https://www.detik.com/jateng/jawa-tengah-meria...,https://akcdn.detik.net.id/community/media/vis...,"Warga Desa Sidomulyo, Kecamatan Dempet, Kabupa...","detikJatengKamis, 25 Apr 2024 12:58 WIB",[]
1,Jokowi Akui Target Turunkan Stunting Jadi 14 P...,https://health.detik.com/detiktv/d-7308553/jok...,https://akcdn.detik.net.id/community/media/vis...,Presiden Joko Widodo (Jokowi) akui target turu...,"detikHealthRabu, 24 Apr 2024 16:02 WIB",[]
2,Pemkot Palopo Klaim Stunting Turun Jadi 100 Ka...,https://www.detik.com/sulsel/palopo/d-7307882/...,https://akcdn.detik.net.id/community/media/vis...,"Pemerintah Kota (Pemkot) Palopo, Sulawesi Sela...","detikSulselRabu, 24 Apr 2024 11:54 WIB",[]
3,BKKBN Ingatkan Usia Hamil Maksimal 35 Tahun un...,https://www.detik.com/sumut/berita/d-7265954/b...,https://akcdn.detik.net.id/community/media/vis...,Kepala BKKBN Hasto Wardoyo berbicara tentang P...,"detikSumutKamis, 28 Mar 2024 18:30 WIB",[]
4,Cara Iptu Dina Turunkan Stunting di Lombok Bar...,https://news.detik.com/berita/d-7305738/cara-i...,https://akcdn.detik.net.id/community/media/vis...,Stunting menjadi salah satu persoalan yang men...,"detikNewsSelasa, 23 Apr 2024 10:24 WIB",[]
...,...,...,...,...,...,...
1895,Wapres Ma'ruf Juga akan Bertemu 3 Bacawapres 1...,https://news.detik.com/pemilu/d-7010919/wapres...,https://akcdn.detik.net.id/community/media/vis...,Wakil Presiden Ma'ruf Amin disebut-sebut akan ...,"detikNewsSelasa, 31 Okt 2023 08:02 WIB",[]
1896,PNM Gelar Edukasi Kesehatan Gizi Anak untuk 15...,https://news.detik.com/berita/d-6841408/pnm-ge...,https://akcdn.detik.net.id/community/media/vis...,PT Permodalan Nasional Madani (PNM) memberikan...,"detikNewsSelasa, 25 Jul 2023 20:30 WIB",[]
1897,"Hari Keluarga Nasional 29 Juni 2023: Sejarah, ...",https://news.detik.com/berita/d-6788239/hari-k...,https://akcdn.detik.net.id/community/media/vis...,Hari Keluarga Nasional (Harganas) diperingati ...,"detikNewsJumat, 23 Jun 2023 11:24 WIB",[]
1898,Jokowi Minta Kepala Daerah Ikut Kasih Bansos k...,https://finance.detik.com/berita-ekonomi-bisni...,https://akcdn.detik.net.id/community/media/vis...,Pemerintah pusat sudah membagikan sederet bant...,"detikFinanceSenin, 30 Okt 2023 14:07 WIB",[]


In [28]:
df2 = pd.read_csv("detik11(2).csv")
df2

Unnamed: 0,judul,link,gambar,body,waktu,comments
0,Jokowi Setujui Pembentukan Inpres Terkait Air ...,https://news.detik.com/berita/d-6997954/jokowi...,https://akcdn.detik.net.id/community/media/vis...,Presiden Joko Widodo (Jokowi) menggelar rapat ...,"detikNewsSenin, 23 Okt 2023 17:35 WIB",[]
1,Dicky Saromi Resmi Dilantik Jadi Pj Walkot Cimahi,https://www.detik.com/jabar/berita/d-6996109/d...,https://akcdn.detik.net.id/community/media/vis...,Dicky Saromi resmi dilantik sebagai Penjabat (...,"detikJabarMinggu, 22 Okt 2023 17:05 WIB",[]
2,Misi Khusus Mahasiswa KKN UGM di Gorontalo,https://www.detik.com/jogja/kota-pelajar/d-682...,https://akcdn.detik.net.id/community/media/vis...,Universitas Gadjah Mada (UGM) mengirimkan ribu...,"detikJogjaSenin, 17 Jul 2023 16:39 WIB",[]
3,"25 Januari Hari Gizi Nasional, Begini Sejarah ...",https://health.detik.com/berita-detikhealth/d-...,https://akcdn.detik.net.id/community/media/vis...,Hari Gizi Nasional diperingati setiap tanggal ...,"detikHealthRabu, 24 Jan 2024 08:17 WIB",[]
4,KPAI Catat 3.883 Aduan Pelanggaran Hak-Perlind...,https://news.detik.com/berita/d-7154799/kpai-c...,https://akcdn.detik.net.id/community/media/vis...,Ketua Komisi Perlindungan Anak Indonesia (KPAI...,"detikNewsSenin, 22 Jan 2024 16:54 WIB",[]
...,...,...,...,...,...,...
675,Menko PMK Bakal Berkantor di Papua Awal Septem...,https://news.detik.com/berita/d-6892353/menko-...,https://akcdn.detik.net.id/community/media/vis...,Menteri Koordinator Bidang Pembangunan Manusia...,"detikNewsRabu, 23 Agu 2023 21:05 WIB",[]
676,"Kampanye di Tempat Kelahiran SBY, Ibas Mohon D...",https://news.detik.com/pemilu/d-7186063/kampan...,https://akcdn.detik.net.id/community/media/vis...,Calon legislatif (caleg) DPR RI nomor urut 1 P...,"detikNewsSabtu, 10 Feb 2024 11:25 WIB",[]
677,"KBMKB Banyuaeng Ditutup, Kodim 0723 Harap Pers...",https://www.detik.com/jateng/berita/d-6889899/...,https://akcdn.detik.net.id/community/media/vis...,Upacara penutupan Karya Bhakri Mandiri Klaten ...,"detikJatengSelasa, 22 Agu 2023 16:56 WIB",[]
678,Bupati Klaten Apresiasi TNI-Polri yang Dukung ...,https://www.detik.com/jateng/berita/d-6889501/...,https://akcdn.detik.net.id/community/media/vis...,Bupati Klaten Sri Mulyani menyampaikan apresia...,"detikJatengSelasa, 22 Agu 2023 14:20 WIB",[]


In [29]:
df3 = pd.read_csv("detik11(3).csv")
df3

Unnamed: 0,judul,link,gambar,body,waktu,comments
0,Capaian Kegiatan KBMKB ke-16 di Banyuaeng Klat...,https://www.detik.com/jateng/berita/d-6889497/...,https://akcdn.detik.net.id/visual/2023/08/22/p...,Program Karya Bhakti Mandiri Klaten Bersinar (...,"detikJatengSelasa, 22 Agu 2023 14:17 WIB",[]
1,Ada Rencana Balai Kota Semarang Bakal Pindah k...,https://www.detik.com/jateng/bisnis/d-6888463/...,https://akcdn.detik.net.id/visual/2023/08/21/b...,Kantor Balai Kota Semarang yang kini berada di...,"detikJatengSenin, 21 Agu 2023 20:26 WIB",[]
2,Alasan Bapak-bapak di RI Masih Enggan Pakai Ko...,https://health.detik.com/berita-detikhealth/d-...,https://akcdn.detik.net.id/visual/2022/09/15/i...,Partisipasi pria dalam program keluarga berenc...,"detikHealthMinggu, 07 Mei 2023 17:52 WIB",[]
3,Eri Cahyadi Mulai Blusukan ke Rumah-rumah Warg...,https://www.detik.com/jatim/detikjatim/d-68854...,https://akcdn.detik.net.id/visual/2023/08/19/e...,Wakil Ketua Bidang Politik DPD PDI Perjuangan ...,"detikJatimSabtu, 19 Agu 2023 23:00 WIB",[]
4,Gandeng Emak-emak Jadi Cara Bapenda Jabar Ting...,https://www.detik.com/jabar/bisnis/d-6883026/g...,https://akcdn.detik.net.id/visual/2023/04/19/k...,Badan Pendapatan Daerah (Bapenda) Jabar mengga...,"detikJabarSabtu, 19 Agu 2023 00:31 WIB",[]
...,...,...,...,...,...,...
295,UM Surabaya Kini Punya Fakultas Kedokteran Gig...,https://www.detik.com/jatim/berita/d-6700825/u...,https://akcdn.detik.net.id/visual/2023/05/02/l...,Universitas Muhammadiyah (UM) Surabaya meresmi...,"detikJatimSelasa, 02 Mei 2023 23:45 WIB",[]
296,"Cegah Makanan Mubazir, HNW Dukung UU Bank Makanan",https://news.detik.com/berita/d-6950730/cegah-...,https://akcdn.detik.net.id/visual/2023/09/26/h...,Wakil Ketua MPR RI Hidayat Nur Wahid (HNW) men...,"detikNewsSelasa, 26 Sep 2023 09:21 WIB",[]
297,Khofifah Puji Prestasi Pemkab Madiun di Pelant...,https://www.detik.com/jatim/berita/d-6950437/k...,https://akcdn.detik.net.id/visual/2023/09/25/p...,Gubernur Jawa Timur (Jatim) Khofifah Indar Par...,"detikJatimSenin, 25 Sep 2023 22:07 WIB",[]
298,Pesan Khofifah di XII Nasyiatul Aisyiyah: Doro...,https://www.detik.com/jatim/berita/d-6707502/p...,https://akcdn.detik.net.id/visual/2023/05/06/g...,Gubernur Jawa Timur Khofifah Indar Parawansa m...,"detikJatimMinggu, 07 Mei 2023 01:30 WIB",[]


In [30]:
df4 = pd.read_csv("detik11(4).csv")
df4

Unnamed: 0,judul,link,gambar,body,waktu,comments
0,"Ikuti Pesan Jokowi, Banyuwangi Fokus Soal SDM ...",https://news.detik.com/berita-jawa-timur/d-466...,https://akcdn.detik.net.id/visual/2019/08/16/2...,\n\n\nBanyuwangi - Pidato Kenegaraan Presiden ...,"detikNewsJumat, 16 Agu 2019 19:13 WIB",[]
1,Sederet Rencana Besar Jokowi Genjot Kualitas S...,https://finance.detik.com/berita-ekonomi-bisni...,https://akcdn.detik.net.id/visual/2019/07/08/b...,\r\n ADVERTISEMENT\r\n \r\n SCROLL TO C...,"detikFinanceJumat, 16 Agu 2019 12:08 WIB",[]
2,"Kualitas SDM RI Digenjot hingga ke Daerah, Ini...",https://finance.detik.com/berita-ekonomi-bisni...,https://akcdn.detik.net.id/visual/2019/07/25/4...,\r\n ADVERTISEMENT\r\n \r\n SCROLL TO C...,"detikFinanceSenin, 19 Agu 2019 16:05 WIB",[]
3,Mensos Jawab Isu Politisasi Bansos hingga Peny...,https://news.detik.com/berita/d-4674223/mensos...,https://akcdn.detik.net.id/visual/2019/08/21/2...,Menteri Sosial Agus Gumiwang Kartasasmita meny...,"detikNewsRabu, 21 Agu 2019 07:05 WIB",[]
4,KPAI Harap DPR Segera Sahkan Usia Minimum Nika...,https://news.detik.com/berita/d-4705986/kpai-h...,https://akcdn.detik.net.id/visual/2019/04/02/e...,\r\nADVERTISEMENT\r\n\r\nSCROLL TO CONTINUE WI...,"detikNewsSabtu, 14 Sep 2019 04:32 WIB",[]
...,...,...,...,...,...,...
519,"Jabar Hari Ini: Pria Kejam di Kuningan, Cabuli...",https://www.detik.com/jabar/berita/d-7071739/j...,https://akcdn.detik.net.id/visual/2023/05/03/i...,Sejumlah peristiwa mewarnai pemberitaan di Jaw...,"detikJabarSenin, 04 Des 2023 22:00 WIB",[]
520,Rangkuman 3 Capres-Cawapres di Hari Pertama Ka...,https://news.detik.com/pemilu/d-7062002/rangku...,https://akcdn.detik.net.id/visual/2023/11/02/i...,Selesai sudah para capres dan cawapres menjala...,"detikNewsRabu, 29 Nov 2023 08:58 WIB",[]
521,Gejala Global Pernikahan Dini Melonjak di Masa...,https://news.detik.com/berita/d-5180276/gejala...,https://akcdn.detik.net.id/visual/2018/04/14/2...,Pernikahan di usia dini meningkat selama masa ...,"detikNewsMinggu, 20 Sep 2020 06:28 WIB",[]
522,Si Mungil Sacha Inchi Energi Baru Satu Padu,https://www.detik.com/jatim/bisnis/d-7019536/s...,https://akcdn.detik.net.id/visual/2023/11/05/k...,Aroma harum menyeruak dari dalam sebuah dapur ...,"detikJatimMinggu, 05 Nov 2023 07:30 WIB",[]


In [31]:
concatenated_df = pd.concat([df1, df2, df3, df4])
concatenated_df

Unnamed: 0,judul,link,gambar,body,waktu,comments
0,Ada Rumah Gizi di Balik Sukses Demak Turunkan ...,https://www.detik.com/jateng/jawa-tengah-meria...,https://akcdn.detik.net.id/community/media/vis...,"Warga Desa Sidomulyo, Kecamatan Dempet, Kabupa...","detikJatengKamis, 25 Apr 2024 12:58 WIB",[]
1,Jokowi Akui Target Turunkan Stunting Jadi 14 P...,https://health.detik.com/detiktv/d-7308553/jok...,https://akcdn.detik.net.id/community/media/vis...,Presiden Joko Widodo (Jokowi) akui target turu...,"detikHealthRabu, 24 Apr 2024 16:02 WIB",[]
2,Pemkot Palopo Klaim Stunting Turun Jadi 100 Ka...,https://www.detik.com/sulsel/palopo/d-7307882/...,https://akcdn.detik.net.id/community/media/vis...,"Pemerintah Kota (Pemkot) Palopo, Sulawesi Sela...","detikSulselRabu, 24 Apr 2024 11:54 WIB",[]
3,BKKBN Ingatkan Usia Hamil Maksimal 35 Tahun un...,https://www.detik.com/sumut/berita/d-7265954/b...,https://akcdn.detik.net.id/community/media/vis...,Kepala BKKBN Hasto Wardoyo berbicara tentang P...,"detikSumutKamis, 28 Mar 2024 18:30 WIB",[]
4,Cara Iptu Dina Turunkan Stunting di Lombok Bar...,https://news.detik.com/berita/d-7305738/cara-i...,https://akcdn.detik.net.id/community/media/vis...,Stunting menjadi salah satu persoalan yang men...,"detikNewsSelasa, 23 Apr 2024 10:24 WIB",[]
...,...,...,...,...,...,...
519,"Jabar Hari Ini: Pria Kejam di Kuningan, Cabuli...",https://www.detik.com/jabar/berita/d-7071739/j...,https://akcdn.detik.net.id/visual/2023/05/03/i...,Sejumlah peristiwa mewarnai pemberitaan di Jaw...,"detikJabarSenin, 04 Des 2023 22:00 WIB",[]
520,Rangkuman 3 Capres-Cawapres di Hari Pertama Ka...,https://news.detik.com/pemilu/d-7062002/rangku...,https://akcdn.detik.net.id/visual/2023/11/02/i...,Selesai sudah para capres dan cawapres menjala...,"detikNewsRabu, 29 Nov 2023 08:58 WIB",[]
521,Gejala Global Pernikahan Dini Melonjak di Masa...,https://news.detik.com/berita/d-5180276/gejala...,https://akcdn.detik.net.id/visual/2018/04/14/2...,Pernikahan di usia dini meningkat selama masa ...,"detikNewsMinggu, 20 Sep 2020 06:28 WIB",[]
522,Si Mungil Sacha Inchi Energi Baru Satu Padu,https://www.detik.com/jatim/bisnis/d-7019536/s...,https://akcdn.detik.net.id/visual/2023/11/05/k...,Aroma harum menyeruak dari dalam sebuah dapur ...,"detikJatimMinggu, 05 Nov 2023 07:30 WIB",[]


In [20]:
res_detik2 = detik.search('stunting', 23, True)
res_detik2

[{'judul': 'Lantik Wali Kota Gunungsitoli, Pj Gubsu Minta Jaga Peningkatan IPM',
  'link': 'https://www.detik.com/sumut/berita/d-7179155/lantik-wali-kota-gunungsitoli-pj-gubsu-minta-jaga-peningkatan-ipm',
  'gambar': 'https://akcdn.detik.net.id/community/media/visual/2024/02/05/pj-gubsu-hassanudin-saat-melantik-wali-kota-gunungsitoli_43.jpeg?w=250&q=',
  'body': 'Penjabat (Pj) Gubernur Sumatera Utara (Sumut) Hassanudin melantik  Sowa\'a Laoli menjadi Wali Kota Gunungsitoli. Sowa\'a dilantik menggantikan Lakhomizaro Zebua yang meninggal dunia.Pelantikan itu digelar di Aula Tengku Rizal Nurdin, Senin (5/2/2024). Dalam pelantikan, Hassanudin meminta agar Sowa\'a mempertahankan peningkatan indeks pembangunan manusia (IPM)Dijelaskan jika IPM Kota Gunungsitoli meningkat rata-rata 0,63% per tahun dari 2020 hingga 2023. Peningkatan itu juga terjadi dua tahun terakhir, 2022 IPM Gunungsitoli sebesar 71,11 poin dan di 2023 menjadi 71,55 poin (meningkat 0,44 poin).\r\nADVERTISEMENT\r\n\r\nSCROLL T

In [32]:
concatenated_df.to_csv("detikcom_all.csv", index=False)

# SCRAPE CNN

In [127]:
from bs4 import BeautifulSoup
from requests import get

base_url = 'https://www.cnnindonesia.com'

class CNN:
    """
    Mengambil berbagai berita dari website cnnindonesia.com
    
    Contoh:
        - mengambil berita internasional
        from src import cnn

        print(cnn.berita_internasional())
    """

    def query(self, url):
        """
        Mengambil data dari body berita
        
        :param url: url yang datanya ingin diambil
        :return: list dictionaries.
        """
        datas = get(url)
        print(url)
        soup = BeautifulSoup(datas.text, 'html.parser')
        parent_tag = soup.find('div', class_="grow-0 w-leftcontent min-w-0")
        if parent_tag is not None:
            tag = parent_tag.find_all("article")
            # print(tag)
            data = []

            for i in tag:
                try:
                    print("yang keberapa")
                    print(i)
                    title = i.find("h2", attrs={'class':'title'}).text.strip()
                    link = i.find('a').get('href')
                    print(link)
                    gambar = i.find('img')['src'].strip()
                    # tipe = i.find('span', attrs={'class':'kanal'}).text
                    # waktu = i.find('span', attrs={'class':'date'}).text
                    data.append({
                        "judul": title,
                        "link": link,
                        "poster": gambar,
                        # "tipe": tipe,
                        # "waktu": waktu
                    })
                except:
                    pass

        else:
            print("Tidak dapat menemukan elemen dengan kelas 'media_rows' di halaman web.")
            data = None

        return data

    def index(self):
        """
        It returns the result of the query of the home news from cnn's site
        :return: The response object.
        """
        return self.query('{}/'.format(base_url))

    def berita_nasional(self):
        """
        Mengambil berita nasional

        :return: list dictionary
        """
        return self.query('{}/nasional'.format(base_url))

    def berita_internasional(self):
        """
        Mengambil berita internasional / luar negeri
        
        :return: list dictionary
        """
        return self.query('{}/internasional'.format(base_url))

    def berita_ekonomi(self):
        """
        Mengambil berita ekonomi
        
        :return: list dictionary
        """
        return self.query('{}/ekonomi'.format(base_url))

    def berita_olahraga(self):
        """
        Mengambil berita olahraga
        
        :return: list dictionary
        """
        return self.query('{}/olahraga'.format(base_url))

    def berita_teknologi(self):
        """
        Mengambil berita teknologi
        
        :return: list dictionary
        """
        return self.query('{}/teknologi'.format(base_url))

    def berita_hiburan(self):
        """
        Mengambil berita hiburan
        
        :return: list dictionary
        """
        return self.query('{}/hiburan'.format(base_url))

    def berita_social(self):
        """
        Mengambil berita sosial
        
        :return: list dictionary
        """
        return self.query('{}/gaya-hidup'.format(base_url))

    def detail(self, url):
        """
        Mengambil detail berita
        :args:
            url : string -> url berita
        :example:
            url : string -> https://www.cnnindonesia.com/teknologi/20220921153459-190-850830/cara-menghapus-data-iphone-sebelum-dijual
        :return: list dictionary
        """
        data = []
        try:
            req = get(url)
            soup = BeautifulSoup(req.text, 'html.parser')
            tag = soup.find('div', class_="detail_text")
            gambar = soup.find('div', class_='media_artikel').find('img').get('src')
            judul = soup.find('h1', class_='title').text
            body = tag.text
            data.append({
                "judul": judul,
                "poster": gambar,
                "body": body,
            })
        except:
            data.append({
                "message": "network error",
            })

        return data

    def search(self,q):
        """
        Mencari berita spesifik berdasarkan query

        :args:
            q : string -> query atau berita yang ingin dicari
        :returns: list dictionary
        """

        return self.query('{}/search/?query={}'.format(base_url, q))

In [10]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from urllib.parse import urlsplit

class CNN2:
    base_url = 'https://www.cnnindonesia.com'

    def __init__(self):
        self.driver = webdriver.Chrome()

    def query(self, url, page_num=1):
        detail = True
        data = []
        for i in range(page_num):
            print(f"Scrape page ke-{i+4}")
            uri = "&page="
            ur = url + uri + str(i + 4)
            self.driver.get(ur)
            wait = WebDriverWait(self.driver, 10)
            try:
                element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.grow-0.w-leftcontent.min-w-0")))
                html = self.driver.page_source
                soup = BeautifulSoup(html, 'html.parser')
                parent_tag = soup.find('div', class_="grow-0 w-leftcontent min-w-0")
                if parent_tag:
                    tag = parent_tag.find_all("article")
                    for i in tag:
                        try:
                            title = i.find("h2", class_='text-cnn_black_light dark:text-white mb-2 inline leading-normal text-xl group-hover:text-cnn_red').text.strip()
                            link = i.find('a')['href']
                            gambar = i.find('img')['src'].strip()
                            body = ''
                            waktu = ''
                            if detail:
                                body, waktu = self.detail(link)
                            data.append({
                                "judul": title,
                                "link": link,
                                "poster": gambar,
                                "body": body,
                                "waktu": waktu
                            })
                        except Exception as e:
                            print(e)
                # print(i)
            except Exception as e:
                print(e)
                return None
            # finally:
        self.driver.quit()
        return data
    
    def get_comments(self, url) -> list:
        data = []
        for i in (url):
            a = urlsplit(i)
            qs = 'single=1'
            detail_url = a.scheme + '://' + a.netloc + a.path + '?' + qs
            self.driver.get(detail_url)
            wait = WebDriverWait(self.driver, 10)
            try:
                iframe_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "//iframe[contains(@id,'uid_018d2ecaee_mdm6mzm6mze')]")))

                # Beralih ke konteks iframe
                self.driver.switch_to.frame('uid_018d2ecaee_mdm6mzm6mze')
                # print(iframe_element)
                # wait = WebDriverWait(self.driver, 10)
                # element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.my-5")))
                # html = self.driver.page_source
                # soup = BeautifulSoup(html, 'html.parser')
                # comment_tags = soup.find('div', class_ = 'komentar-iframe-min-media__desc')
                # # print(comment_tags)

                # comments = []
                comment_tags = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#cmt73935679 > div > div.komentar-iframe-min-media__text > div.komentar-iframe-min-media__desc")))
            
                # Ambil komentar dari setiap elemen komentar
                comments = [comment_tag.text.strip() for comment_tag in comment_tags]
                # for comment_tag in comment_tags:
                #     comments.append(comment_tag.text.strip())
                data.append({
                    # "link": i,
                    'comments': comments
                })

            except Exception as e:
                print(e)
                return None
            # finally:
        self.driver.quit()
        return data

    def search(self, q, page_num=1):
            return self.query('{}/search/?query={}'.format(self.base_url, q), page_num)
    
    def detail(self, url):
        a = urlsplit(url)
        qs = 'single=1'
        detail_url = a.scheme + '://' + a.netloc + a.path + '?' + qs
        req = get(detail_url)
        # print(req)
        soup = BeautifulSoup(req.text, 'html.parser')
        tag = soup.find('div', class_="text-cnn_grey text-sm mb-4")
        # print(tag)
        tag2 = soup.find('div', class_="detail-text text-cnn_black text-sm grow min-w-0")
        # print(tag2)
        waktu = tag.text
        body = ''
        if tag2.find_all('p'):
            for i in tag2.find_all('p'):
                body += i.text
        else:
            body += tag2.text
            
        return body, waktu

In [13]:
cnn2 = CNN2()

isi3 = []
# for i in range(2):
    # method search(query, page_number, detail)
    # page_num = i+1
    # print(f"Scrape page ke-{page_num}")
res_cnn = cnn2.search('stunting', 1)
    # print(res_cnn)
    # print()
    # isi3.extend(res_cnn)
isi3.extend(res_cnn)


Scrape page ke-4


TimeoutException: Message: timeout: Timed out receiving message from renderer: 299.726
  (Session info: chrome=124.0.6367.62)
Stacktrace:
	GetHandleVerifier [0x00007FF6A1BA1502+60802]
	(No symbol) [0x00007FF6A1B1AC02]
	(No symbol) [0x00007FF6A19D7CE4]
	(No symbol) [0x00007FF6A19C0FE1]
	(No symbol) [0x00007FF6A19C0D42]
	(No symbol) [0x00007FF6A19BEBE4]
	(No symbol) [0x00007FF6A19BF3FF]
	(No symbol) [0x00007FF6A19CE0F8]
	(No symbol) [0x00007FF6A19E57CF]
	(No symbol) [0x00007FF6A19EB38A]
	(No symbol) [0x00007FF6A19BFB85]
	(No symbol) [0x00007FF6A19E550E]
	(No symbol) [0x00007FF6A1A6A582]
	(No symbol) [0x00007FF6A1A4A923]
	(No symbol) [0x00007FF6A1A18FEC]
	(No symbol) [0x00007FF6A1A19C21]
	GetHandleVerifier [0x00007FF6A1EA411D+3217821]
	GetHandleVerifier [0x00007FF6A1EE60B7+3488055]
	GetHandleVerifier [0x00007FF6A1EDF03F+3459263]
	GetHandleVerifier [0x00007FF6A1C5B846+823494]
	(No symbol) [0x00007FF6A1B25F9F]
	(No symbol) [0x00007FF6A1B20EC4]
	(No symbol) [0x00007FF6A1B21052]
	(No symbol) [0x00007FF6A1B118A4]
	BaseThreadInitThunk [0x00007FF8B4BA5550+16]
	RtlUserThreadStart [0x00007FF8B60A485B+43]


In [12]:
isi3

[]

In [121]:
isi3

[{'judul': 'Riwayat Rusunawa Muara Baru yang Dikunjungi Pertama oleh Gibran',
  'link': 'https://www.cnnindonesia.com/nasional/20240426164514-20-1090966/riwayat-rusunawa-muara-baru-yang-dikunjungi-pertama-oleh-gibran',
  'poster': 'https://akcdn.detik.net.id/visual/2024/04/24/gibran-bagi-bagi-susu-di-rusun-waduk-pluit-5_169.jpeg?w=500&q=90',
  'body': 'Gibran Rakabuming Raka\xa0memilih Rusunawa Muara Baru, Jakarta Utara sebagai tempat yang pertama dikunjungi usai ditetapkan KPU sebagai wakil presiden terpilih hasil Pilpres 2024 pada Rabu (24/4).Rusunawa Muara Baru merupakan salah satu tempat yang menjadi perhatian khusus bagi Gibran Rakabuming Raka, wakil presiden terpilih untuk periode 2024-2029.Berlokasi di Jl. Muara Baru Ujung Gedung Pompa, Penjaringan, Kecamatan Penjaringan, Jakarta Utara, Rusunawa Muara Baru dipilih karena wilayah tersebut sangat padat penduduk.\r\n    ADVERTISEMENT\r\n\r\n    SCROLL TO CONTINUE WITH CONTENT\r\nDi sana, Gibran meluangkan waktu untuk mendengarkan k

In [106]:
isi3 = []
isi3.extend(res_cnn)

In [123]:
import pandas as pd
res = pd.DataFrame(isi3)
# url = res["link"]
# url
res

Unnamed: 0,judul,link,poster,body,waktu
0,Riwayat Rusunawa Muara Baru yang Dikunjungi Pe...,https://www.cnnindonesia.com/nasional/20240426...,https://akcdn.detik.net.id/visual/2024/04/24/g...,Gibran Rakabuming Raka memilih Rusunawa Muara ...,"Sabtu, 27 Apr 2024 09:10 WIB"
1,Gibran Ungkap Alasan Muara Baru Dikunjungi Per...,https://www.cnnindonesia.com/nasional/20240426...,https://akcdn.detik.net.id/visual/2024/04/24/g...,Gibran Rakabuming Raka mengungkap alasan memil...,"Jumat, 26 Apr 2024 13:18 WIB"
2,Ma'ruf Amin: Target Penurunan Stunting 14 Pers...,https://www.cnnindonesia.com/nasional/20240425...,https://akcdn.detik.net.id/visual/2023/12/23/m...,Wakil Presiden Ma'ruf Amin mengungkapkan targe...,"Jumat, 26 Apr 2024 00:55 WIB"
3,Bulog Belum Diajak Bicara soal Program Makan G...,https://www.cnnindonesia.com/ekonomi/202404251...,https://akcdn.detik.net.id/visual/2024/02/29/s...,Direktur Utama Perum Bulog Bayu Krisnamurthi m...,"Kamis, 25 Apr 2024 19:22 WIB"
4,Jokowi Masukkan Program Unggulan Prabowo-Gibra...,https://www.cnnindonesia.com/ekonomi/202404251...,https://akcdn.detik.net.id/visual/2019/10/11/0...,Presiden Joko Widodo (Jokowi) memasukkan progr...,"Kamis, 25 Apr 2024 14:56 WIB"
...,...,...,...,...,...
706,"Stunting, Prioritas Utama Masalah Gizi Indonesia",https://www.cnnindonesia.com/gaya-hidup/201602...,https://akcdn.detik.net.id/visual/2015/04/10/3...,\r\n ADVERTISEMENT\r\n\r\n SCROLL TO CON...,"Jumat, 19 Feb 2016 07:42 WIB"
707,Sanitasi Buruk Picu Kematian di Papua,https://www.cnnindonesia.com/gaya-hidup/201511...,https://akcdn.detik.net.id/visual/2015/02/02/8...,\r\n ADVERTISEMENT\r\n\r\n SCROLL TO CON...,"Kamis, 19 Nov 2015 12:20 WIB"
708,Indonesia Negara dengan Jumlah Anak Pendek Ter...,https://www.cnnindonesia.com/gaya-hidup/201507...,https://akcdn.detik.net.id/visual/2015/06/03/f...,\r\n ADVERTISEMENT\r\n\r\n SCROLL TO CON...,"Kamis, 02 Jul 2015 15:42 WIB"
709,Menkes Sorot soal Stunting dan Angka Kematian Ibu,https://www.cnnindonesia.com/nasional/20150203...,https://akcdn.detik.net.id/visual/2015/01/03/a...,\r\n ADVERTISEMENT\r\n\r\n SCROLL TO CON...,"Rabu, 04 Feb 2015 07:33 WIB"


In [89]:
aa = res["link"][-1:]

In [102]:
cnn2 = CNN2()
comm = cnn2.get_comments(aa)

Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF6961F1502+60802]
	(No symbol) [0x00007FF69616AC02]
	(No symbol) [0x00007FF696027CE4]
	(No symbol) [0x00007FF696076D4D]
	(No symbol) [0x00007FF696076E1C]
	(No symbol) [0x00007FF6960BCE37]
	(No symbol) [0x00007FF69609ABBF]
	(No symbol) [0x00007FF6960BA224]
	(No symbol) [0x00007FF69609A923]
	(No symbol) [0x00007FF696068FEC]
	(No symbol) [0x00007FF696069C21]
	GetHandleVerifier [0x00007FF6964F411D+3217821]
	GetHandleVerifier [0x00007FF6965360B7+3488055]
	GetHandleVerifier [0x00007FF69652F03F+3459263]
	GetHandleVerifier [0x00007FF6962AB846+823494]
	(No symbol) [0x00007FF696175F9F]
	(No symbol) [0x00007FF696170EC4]
	(No symbol) [0x00007FF696171052]
	(No symbol) [0x00007FF6961618A4]
	BaseThreadInitThunk [0x00007FF8B4BA5550+16]
	RtlUserThreadStart [0x00007FF8B60A485B+43]



In [103]:
comm

In [167]:
cnn2 = CNN2()

det = cnn2.detail("https://www.cnnindonesia.com/nasional/20240425173453-20-1090515/maruf-amin-target-penurunan-stunting-14-persen-akan-dievaluasi")
det

Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF63FE51502+60802]
	(No symbol) [0x00007FF63FDCAC02]
	(No symbol) [0x00007FF63FC87CE4]
	(No symbol) [0x00007FF63FCD6D4D]
	(No symbol) [0x00007FF63FCD6E1C]
	(No symbol) [0x00007FF63FD1CE37]
	(No symbol) [0x00007FF63FCFABBF]
	(No symbol) [0x00007FF63FD1A224]
	(No symbol) [0x00007FF63FCFA923]
	(No symbol) [0x00007FF63FCC8FEC]
	(No symbol) [0x00007FF63FCC9C21]
	GetHandleVerifier [0x00007FF64015411D+3217821]
	GetHandleVerifier [0x00007FF6401960B7+3488055]
	GetHandleVerifier [0x00007FF64018F03F+3459263]
	GetHandleVerifier [0x00007FF63FF0B846+823494]
	(No symbol) [0x00007FF63FDD5F9F]
	(No symbol) [0x00007FF63FDD0EC4]
	(No symbol) [0x00007FF63FDD1052]
	(No symbol) [0x00007FF63FDC18A4]
	BaseThreadInitThunk [0x00007FF8B4BA5550+16]
	RtlUserThreadStart [0x00007FF8B60A485B+43]



In [168]:
det

In [125]:
cnn = CNN()

isi2 = []

# for i in range(2):
#     # method search(query, page_number, detail)
#     page_num = i+1
#     print(f"Scrape page ke-{page_num}")
res_cnn = cnn.search('stunting')
    # print(res_detik)
    # print()
    # isi2.extend(res_detik)

https://www.cnnindonesia.com/search/?query=stunting
yang keberapa
<article class="flex-grow animate-pulse">
<div class="flex group items-center gap-4">
<span class="flex-none overflow-hidden block relative w-[270px]">
<span class="block bg-placeholder aspect-w-16 aspect-h-9"></span>
</span>
<span class="flex-grow">
<span class="block mb-2 bg-placeholder w-20 h-4"></span>
<span class="bg-placeholder w-full mb-2 inline-block h-5"></span><span class="bg-placeholder w-9/12 inline-block h-5"></span>
</span>
</div>
</article>
yang keberapa
<article class="flex-grow animate-pulse">
<div class="flex group items-center gap-4">
<span class="flex-none overflow-hidden block relative w-[270px]">
<span class="block bg-placeholder aspect-w-16 aspect-h-9"></span>
</span>
<span class="flex-grow">
<span class="block mb-2 bg-placeholder w-20 h-4"></span>
<span class="bg-placeholder w-full mb-2 inline-block h-5"></span><span class="bg-placeholder w-9/12 inline-block h-5"></span>
</span>
</div>
</article>


In [156]:
res = pd.DataFrame(res_cnn2)

In [124]:
res.to_csv("cnn_nocomm.csv", index=False)