In [1]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
import pandas as pd
import time

In [2]:

def get_search_url(app_name):
    base_url = "https://play.google.com/store/search?q="
    query = quote(app_name)
    return f"{base_url}{query}&c=apps&hl=ko"

In [3]:

def find_nested_a_tag(tag, class_name):
    for div in tag.find_all('div', class_='ipRz4'):
        a_tag = div.find('a', class_=class_name)
        if a_tag:
            return a_tag
    return None


In [4]:

def get_app_urls(search_query, num_results=1):
    search_url = get_search_url(search_query)
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(search_url, headers=headers)
    
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        
        app_urls = []
        for div in soup.find_all('div', class_='XUIuZ', limit=num_results):
            a_tag = find_nested_a_tag(div, 'Qfxief')
            if a_tag:
                app_url = 'https://play.google.com' + a_tag['href']
                app_urls.append(app_url)
        
        return app_urls
    else:
        return []


In [5]:
def get_app_info(app_url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(app_url, headers=headers)
    
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        
        # 앱 제목 가져오기
        app_title_tag = soup.find('h1', {'class': 'Fd93Bb'})
        app_title = app_title_tag.text if app_title_tag else 'Title not found'

        # 앱 설명 가져오기 (Full Description)
        description_div = soup.find('div', {'class': 'bARER'})
        app_description = description_div.get_text(separator=' ', strip=True) if description_div else 'Description not found'

        # 앱 카테고리 가져오기
        app_category_div = soup.find('div', {'data-idom-class': 'Rj2Mlf OLiIxf PDpWxe P62QJc LQeN7 LMoCf'})
        app_category_span = app_category_div.find('span', {'class': 'VfPpkd-vQzf8d'}) if app_category_div else None
        app_category = app_category_span.text if app_category_span else 'Category not found'
        
        return {
            'title': app_title,
            'category': app_category,
            'description': app_description
        }
    else:
        return {}


In [6]:
# 앱 이름 리스트
app_names = ["카카오톡", "파파고", "카카오맵", "네이버", "쿠팡", "인스타그램", "네이버맵", "지그재그", "링크드인", "네이버블로그", "크롬"]

In [7]:
# 데이터를 저장할 리스트
app_data = []

# 각 앱의 URL 수집 및 정보 크롤링
for app_name in app_names:
    app_urls = get_app_urls(app_name)
    if app_urls:
        for url in app_urls:
            app_info = get_app_info(url)
            app_info['name'] = app_name
            app_data.append(app_info)
            break
    else:
        app_data.append({'name': app_name, 'title': 'No URL found', 'category': '', 'description': ''})
    
    # To avoid being blocked by Google Play Store, it's a good idea to add a delay between requests
    time.sleep(2)  # Sleep for 2 seconds

In [9]:
# pandas 데이터프레임으로 변환
df = pd.DataFrame(app_data)

In [10]:
display(df)

Unnamed: 0,title,category,description,name
0,KakaoTalk : Messenger,Communication,"With more than 150 Million users worldwide, Ka...",카카오톡
1,Naver Papago - AI Translator,Tools,Whenever you need translation during your trav...,파파고
2,KakaoMap - Map / Navigation,Travel & Local,KakaoMap will show you the fastest routes in K...,카카오맵
3,네이버 - NAVER,Books & Reference,A wealth of information optimized for mobile! ...,네이버
4,쿠팡 (Coupang),Shopping,Coupang is the perfect place for savvy shopper...,쿠팡
5,Instagram,Social,Little moments lead to big friendships. Share ...,인스타그램
6,"NAVER Map, Navigation",Maps & Navigation,South Korea's GPS navigation Get started right...,네이버맵
7,Zigzag: +7000 shops in one app,Shopping,"Zigzag is Korea’s No.1 fashion and beauty app,...",지그재그
8,LinkedIn: Jobs & Business News,Business,Welcome professionals! The key to getting in i...,링크드인
9,네이버 블로그 - Naver Blog,Social,"Stay connected anywhere, anytime with Naver Bl...",네이버블로그


In [11]:
# CSV 파일로 저장
df.to_csv('app_info.csv', index=False, encoding='utf-8-sig')