Dimas Wahyu Saputro - Dibimbing - Nomor 1

In [29]:
import requests
import pandas as pd

# Define the base API URL
base_api_url = "https://berita-indo-api-next.vercel.app/api/"

# API endpoints and types
api_data = {
    "CNN News": {"type": "cnn-news/", "listType": ["nasional", "internasional", "ekonomi", "olahraga", "teknologi", "hiburan", "gaya-hidup"]},
    "CNBC News": {"type": "cnbc-news/", "listType": ["market", "news", "entrepreneur", "syariah", "tech", "lifestyle"]},
    "Republika News": {"type": "republika-news/", "listType": ["news", "nusantara", "khazanah", "islam-digest", "internasional", "ekonomi", "sepakbola", "leisure"]},
    "Tempo News": {"type": "tempo-news/", "listType": ["nasional", "bisnis", "metro", "dunia", "bola", "sport", "cantik", "tekno", "otomotif", "nusantara"]},
}

# Create an empty DataFrame to store the data
df = pd.DataFrame(columns=["title", "link", "contentSnippet", "isoDate", "image"])

# Loop through the API endpoints
for provider, endpoint_data in api_data.items():
    for news_type in endpoint_data["listType"]:
        api_url = f"{base_api_url}{endpoint_data['type']}{news_type}"

        # (1) Hit API
        response = requests.get(api_url)

        if response.status_code == 200:
            data = response.json()
            data_list = data.get('data', [])

            # (2) Jadikan DF
            # Create a DataFrame for the current API response
            api_df = pd.DataFrame(data_list)

            # (3) Transformasi data kolom ‘isoDate’ menjadi tipe data datetime
            # Transformasi kolom 'isoDate' menjadi tipe data datetime
            api_df['isoDate'] = pd.to_datetime(api_df['isoDate'])

            # Add a 'creator' column based on the provider and news_type
            api_df["creator"] = provider.replace(" ", "") + news_type.upper()

            # Add a 'categories' column with the news_type
            api_df["categories"] = news_type

            # Concatenate the current API DataFrame with the main DataFrame
            df = pd.concat([df, api_df], ignore_index=True)

df = df[["creator", "title", "link", "categories", "isoDate", "description", "image"]]
df # banyak data NaN karena dari API-nya

Unnamed: 0,creator,title,link,categories,isoDate,description,image
0,CNNNewsNASIONAL,MK Didesak Percepat Putus Uji Formil Baru soal...,https://www.cnnindonesia.com/nasional/20231109...,nasional,2023-11-09 12:46:16+00:00,,{'small': 'https://akcdn.detik.net.id/visual/2...
1,CNNNewsNASIONAL,Sekuriti Apartemen Bekasi Minta Maaf Sudah Rib...,https://www.cnnindonesia.com/nasional/20231109...,nasional,2023-11-09 12:38:48+00:00,,{'small': 'https://akcdn.detik.net.id/visual/2...
2,CNNNewsNASIONAL,Gibran Jawab PDIP Solo Merasa Diintimidasi: Pa...,https://www.cnnindonesia.com/nasional/20231109...,nasional,2023-11-09 12:36:31+00:00,,{'small': 'https://akcdn.detik.net.id/visual/2...
3,CNNNewsNASIONAL,Hasto Minta Bobby Mundur dari PDIP Usai Dukung...,https://www.cnnindonesia.com/nasional/20231109...,nasional,2023-11-09 12:28:21+00:00,,{'small': 'https://akcdn.detik.net.id/visual/2...
4,CNNNewsNASIONAL,FOTO: Perjuangan Massa Aksi Bela Palestina di ...,https://www.cnnindonesia.com/nasional/20231109...,nasional,2023-11-09 12:19:47+00:00,,{'small': 'https://akcdn.detik.net.id/visual/2...
...,...,...,...,...,...,...,...
1505,TempoNewsOTOMOTIF,Moto3 Malaysia: Mario Aji Tak Cocok dengan 2 T...,https://otomotif.tempo.co/read/1794638/moto3-m...,otomotif,2023-11-09 08:00:00+00:00,,
1506,TempoNewsOTOMOTIF,"Mobil Harga Rp 600 Juta, Pilih Pindad Maung at...",https://otomotif.tempo.co/read/1794624/mobil-h...,otomotif,2023-11-09 07:00:00+00:00,,
1507,TempoNewsOTOMOTIF,Ford Bronco Buatan Cina Bakal Rilis Tahun Depa...,https://otomotif.tempo.co/read/1794503/ford-br...,otomotif,2023-11-09 06:00:00+00:00,,
1508,TempoNewsOTOMOTIF,"Kawasaki Z500 SE Rilis di EICMA 2023, Simak Sp...",https://otomotif.tempo.co/read/1794497/kawasak...,otomotif,2023-11-09 05:00:00+00:00,,


In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1510 entries, 0 to 1509
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   creator      1510 non-null   object             
 1   title        1510 non-null   object             
 2   link         1510 non-null   object             
 3   categories   1510 non-null   object             
 4   isoDate      1510 non-null   datetime64[ns, UTC]
 5   description  120 non-null    object             
 6   image        1420 non-null   object             
dtypes: datetime64[ns, UTC](1), object(6)
memory usage: 82.7+ KB


In [30]:
# (4) AGREGASI DATA
aggregated_data = df.groupby('creator').size().reset_index(name='count').sort_values(by='count', ascending=False)
aggregated_data

Unnamed: 0,creator,count
0,CNBCNewsENTREPRENEUR,100
7,CNNNewsGAYA-HIDUP,100
1,CNBCNewsLIFESTYLE,100
12,CNNNewsTEKNOLOGI,100
11,CNNNewsOLAHRAGA,100
10,CNNNewsNASIONAL,100
9,CNNNewsINTERNASIONAL,100
8,CNNNewsHIBURAN,100
6,CNNNewsEKONOMI,100
5,CNBCNewsTECH,100
