In [1]:
import pandas as pd
from pytrends.request import TrendReq
import time
import logging

In [1]:
import pandas as pd
from pytrends.request import TrendReq
import logging
import time

def get_trend(keywords):
    logging.basicConfig(filename='scraping_errors.log', level=logging.ERROR)
    pytrends = TrendReq(hl='en-ID', tz=360)
    provinces = {
        "Aceh": "ID-AC",
        "Bali": "ID-BA",
        "Kepulauan Bangka Belitung": "ID-BB",
        "Banten": "ID-BT",
        "Bengkulu": "ID-BE",
        "Jawa Tengah": "ID-JT",
        "Kalimantan Tengah": "ID-KT",
        "Sulawesi Tengah": "ID-ST",
        "Jawa Timur": "ID-JI",
        "Kalimantan Timur": "ID-KI",
        "Nusa Tenggara Timur": "ID-NT",
        "Gorontalo": "ID-GO",
        "Jambi": "ID-JA",
        "Lampung": "ID-LA",
        "Maluku": "ID-MA",
        "Kalimantan Utara": "ID-KU",
        "Maluku Utara": "ID-MU",
        "Sulawesi Utara": "ID-SA",
        "Sumatera Utara": "ID-SU",
        "Papua": "ID-PA",
        "Riau": "ID-RI",
        "Kepulauan Riau": "ID-KR",
        "Sulawesi Tenggara": "ID-SG",
        "Kalimantan Selatan": "ID-KS",
        "Sulawesi Selatan": "ID-SN",
        "Sumatera Selatan": "ID-SS",
        "DKI Jakarta": "ID-JK",
        "DI Yogyakarta": "ID-YO",
        "Jawa Barat": "ID-JB",
        "Kalimantan Barat": "ID-KB",
        "Nusa Tenggara Barat": "ID-NB",
        "Papua Barat": "ID-PB",
        "Sulawesi Barat": "ID-SR",
        "Sumatera Barat": "ID-SB"
    }

    results_df = pd.DataFrame()

    for province, geo_code in provinces.items():
        attempt = 0
        while attempt < 5:  # Retry up to 5 times
            try:
                pytrends.build_payload(kw_list=keywords, timeframe='2024-10-01 2024-12-31', geo=geo_code)
                interest_over_time_df = pytrends.interest_over_time()
                
                if interest_over_time_df.empty:
                    raise ValueError("No data returned for this province.")
                
                interest_values = interest_over_time_df[keywords[0]].rename(province)
                if results_df.empty:
                    results_df = interest_values.to_frame()
                else:
                    results_df = results_df.join(interest_values)

                print(f"Interest over time for {province}:")
                print(interest_values.head())
                break  # Exit the retry loop if successful

            except Exception as e:
                logging.error(f"Error for {province}: {e}")
                attempt += 1
                time.sleep(5)
                if attempt == 5:
                    print(f"Failed to retrieve data for {province} after multiple attempts.")
                    if results_df.empty:
                        results_df = pd.DataFrame(index=pd.date_range(start='2024-10-01', end='2024-12-31'))
                    results_df[province] = 0
                    break

    results_df.reset_index(inplace=True)
    results_df.rename(columns={'index': 'Date'}, inplace=True)
    results_df.to_csv(f'{keywords[0]}.csv', index=False)

    return results_df

In [17]:
keywords = ['bawang merah', 'bawang putih', 'beras', 'cabai merah'
            , 'cabai rawit', 'daging ayam', 'daging sapi', 'gula', 'minyak goreng', 'telur ayam', 'tepung terigu']
for keyword in keywords:
    get_trend([keyword])

Interest over time for Aceh:
date
2024-10-01     0
2024-10-02     0
2024-10-03     0
2024-10-04     0
2024-10-05    73
Name: Aceh, dtype: int32
Interest over time for Bali:
date
2024-10-01    0
2024-10-02    0
2024-10-03    0
2024-10-04    0
2024-10-05    0
Name: Bali, dtype: int32
Failed to retrieve data for Bangka Belitung Islands after multiple attempts.
Interest over time for Banten:
date
2024-10-01    0
2024-10-02    0
2024-10-03    0
2024-10-04    0
2024-10-05    0
Name: Banten, dtype: int32
Interest over time for Bengkulu:
date
2024-10-01    0
2024-10-02    0
2024-10-03    0
2024-10-04    0
2024-10-05    0
Name: Bengkulu, dtype: int32
Interest over time for Jawa Tengah:
date
2024-10-01    59
2024-10-02    58
2024-10-03    73
2024-10-04    40
2024-10-05    61
Name: Jawa Tengah, dtype: int32
Interest over time for Kalimantan Tengah:
date
2024-10-01    0
2024-10-02    0
2024-10-03    0
2024-10-04    0
2024-10-05    0
Name: Kalimantan Tengah, dtype: int32
Failed to retrieve data for

In [6]:
def get_trend2(keywords):
    pytrends = TrendReq(hl='en-ID', tz=360)
    provinces = {
        "Aceh": "ID-AC",
        "Bali": "ID-BA",
        "Bangka Belitung Islands": "ID-BB",
        "Banten": "ID-BT",
        "Bengkulu": "ID-BE",
        "Jawa Tengah": "ID-JT",
        "Kalimantan Tengah": "ID-KT",
        "Sulawesi Tengah": "ID-ST",
        "Jawa Timur": "ID-JI",
        "Kalimantan Timur": "ID-KI",
        "Nusa Tenggara Timur": "ID-NT",
        "Gorontalo": "ID-GO",
        "Jambi": "ID-JA",
        "Lampung": "ID-LA",
        "Maluku": "ID-MA",
        "Kalimantan Utara": "ID-KU",
        "Maluku Utara": "ID-MU",
        "Sulawesi Utara": "ID-SA",
        "Sumatra Utara": "ID-SU",
        "Papua": "ID-PA",
        "Riau": "ID-RI",
        "Riau Islands": "ID-KR",
        "Sulawesi Tenggara": "ID-SG",
        "Kalimantan Selatan": "ID-KS",
        "Sulawesi Selatan": "ID-SN",
        "Sumatra Selatan": "ID-SS",
        "DKI Jakarta": "ID-JK",
        "DI Yogyakarta": "ID-YO",
        "Jawa Barat": "ID-JB",
        "Kalimantan Barat": "ID-KB",
        "Nusa Tenggara Barat": "ID-NB",
        "Papua Barat": "ID-PB",
        "Sulawesi Barat": "ID-SR",
        "Sumatra Barat": "ID-SB"
    }

    results_df = pd.DataFrame()

    for province, geo_code in provinces.items():
        try:
            pytrends.build_payload(kw_list=keywords, timeframe='2024-10-01 2024-12-31', geo=geo_code)
            interest_over_time_df = pytrends.interest_over_time()
            interest_values = interest_over_time_df[keywords[0]].rename(province)
            interest_values.name = province
            if results_df.empty:
                results_df = interest_values.to_frame()
            else:
                results_df = results_df.join(interest_values)
            print(f"Interest over time for {province}:")
            print(interest_values.head())
        
        except Exception as e:
            print(f"An error occurred for {province}: {e}")
        # time.sleep(5)
        
    results_df.reset_index(inplace=True)
    results_df.rename(columns={'index': 'Date'}, inplace=True)

    results_df.to_csv('google_trends_results.csv', index=False)

    return results_df

In [7]:
get_trend2(keywords)

Interest over time for Aceh:
date
2024-10-01     0
2024-10-02     0
2024-10-03     0
2024-10-04     0
2024-10-05    73
Name: Aceh, dtype: int32
Interest over time for Bali:
date
2024-10-01    0
2024-10-02    0
2024-10-03    0
2024-10-04    0
2024-10-05    0
Name: Bali, dtype: int32
An error occurred for Bangka Belitung Islands: 'bawang merah'
Interest over time for Banten:
date
2024-10-01     0
2024-10-02     0
2024-10-03     0
2024-10-04     0
2024-10-05    79
Name: Banten, dtype: int32
Interest over time for Bengkulu:
date
2024-10-01    0
2024-10-02    0
2024-10-03    0
2024-10-04    0
2024-10-05    0
Name: Bengkulu, dtype: int32
Interest over time for Jawa Tengah:
date
2024-10-01    59
2024-10-02    58
2024-10-03    73
2024-10-04    40
2024-10-05    61
Name: Jawa Tengah, dtype: int32
Interest over time for Kalimantan Tengah:
date
2024-10-01    0
2024-10-02    0
2024-10-03    0
2024-10-04    0
2024-10-05    0
Name: Kalimantan Tengah, dtype: int32
An error occurred for Sulawesi Tengah

Unnamed: 0,date,Aceh,Bali,Banten,Bengkulu,Jawa Tengah,Kalimantan Tengah,Jawa Timur,Nusa Tenggara Timur,Gorontalo,...,Sumatra Utara,Riau Islands,Sulawesi Selatan,Sumatra Selatan,DKI Jakarta,DI Yogyakarta,Jawa Barat,Nusa Tenggara Barat,Sulawesi Barat,Sumatra Barat
0,2024-10-01,0,0,0,0,59,0,51,0,0,...,0,0,0,0,42,0,63,0,0,0
1,2024-10-02,0,0,0,0,58,0,74,0,0,...,0,0,0,0,72,0,58,0,0,0
2,2024-10-03,0,0,0,0,73,0,63,0,0,...,0,0,0,0,39,0,77,0,0,0
3,2024-10-04,0,0,0,0,40,0,51,0,0,...,0,0,0,0,0,94,55,0,0,0
4,2024-10-05,73,0,79,0,61,0,49,0,0,...,0,0,0,0,0,0,84,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,2024-12-27,0,0,0,0,37,0,39,0,0,...,0,0,0,0,0,0,53,0,0,0
88,2024-12-28,0,0,0,0,66,0,49,0,0,...,0,0,0,0,0,0,50,0,0,0
89,2024-12-29,0,0,0,0,54,0,31,0,0,...,0,0,0,0,0,0,34,0,0,0
90,2024-12-30,0,0,0,0,100,0,45,0,0,...,0,0,0,100,0,0,58,0,0,0
