### Import Modules

In [1]:
from bs4 import BeautifulSoup
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import numpy as np
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re

### Musinsa Codimap Crawling (Challenge)

In [2]:
chrome_options = Options()

chrome_options.add_experimental_option('detach', True)
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])

driver = webdriver.Chrome(options = chrome_options)

try:
    driver.maximize_window()
except:
    pass

In [3]:
columns = ['codimap_category', 'codimap_title', 'codimap_date', 'views', 'comment_numbers', 'codimap_explain', 'codimap_hashtag', 'codimap_imgurl']
codimap_list = []

for page in range(1, 6):
    url = f'https://www.musinsa.com/app/codimap/lists?style_type=&tag_no=&brand=&display_cnt=60&list_kind=big&sort=comment_cnt&page={page}'
    driver.get(url)
    driver.implicitly_wait(2)

    values = []

    soup = BeautifulSoup(driver.page_source, 'lxml')
    data_rows = soup.find_all('li', attrs = {'class': 'style-list-item'})

    for i, row in enumerate(data_rows):
        print(f'Page #{page}, {i+1}th Musinsa Codimap Crawling')
        blank = []
        
        style_list_item = row.find('div', attrs = {'class': 'style-list-item__thumbnail'})
        style_list_information = row.find('div', attrs = {'class': 'style-list-information'})
        post_information = row.find('div', attrs = {'class': 'post-information'})

        # print(style_list_information)
        # print(post_information)

        codimap_category = style_list_information.a.span
        if codimap_category:
            codimap_category = codimap_category.get_text().strip()
            blank.append(codimap_category)
        else:
            blank.append('Something is wrong')
            print(f'Having problem with Page #{page}, {i+1}th Musinsa Codimap')
            continue

        codimap_title = style_list_information.a.strong
        if codimap_title:
            codimap_title = codimap_title.get_text().strip()
            blank.append(codimap_title)
        else:
            blank.append('Something is wrong')
            print(f'Having problem with Page #{page}, {i+1}th Musinsa Codimap')
            continue

        codimap_date = post_information.findChildren()[0]
        if codimap_date:
            codimap_date = codimap_date.get_text().strip()
            blank.append(codimap_date)
        else:
            blank.append('Something is wrong')
            print(f'Having problem with Page #{page}, {i+1}th Musinsa Codimap')
            continue

        views = post_information.findChildren()[1]
        if views:
            views = views.get_text().strip().split(' ')[1]
            blank.append(views)
        else:
            blank.append('Something is wrong')
            print(f'Having problem with Page #{page}, {i+1}th Musinsa Codimap')
            continue

        comment_numbers = post_information.findChildren()[2]
        if comment_numbers:
            comment_numbers = comment_numbers.get_text().strip().split(' ')[1]
            blank.append(comment_numbers)
        else:
            blank.append('Something is wrong')
            print(f'Having problem with Page #{page}, {i+1}th Musinsa Codimap')
            continue

        ### Optional Part ###
        btn = driver.find_element(By.XPATH, f'/html/body/div[3]/div[2]/form/div[4]/div/ul/li[{i+1}]/div[1]/a')
        btn.click()
        time.sleep(1)

        soup = BeautifulSoup(driver.page_source, 'lxml')

        codimap_explain = soup.find('p', attrs = {'class': 'styling_txt'})
        if codimap_explain:
            codimap_explain = codimap_explain.get_text().strip()
            blank.append(codimap_explain)
        else:
            blank.append('Something is wrong')
            print(f'Having problem with {i+1}th Musinsa Codimap')
            driver.back()
            continue

        codimap_hashtag = soup.find_all('a', attrs = {'class': 'ui-tag-list__item'})
        if codimap_hashtag:
            codimap_hashtag = [tag.get_text().strip() for tag in codimap_hashtag]
            codimap_hashtag = ','.join(codimap_hashtag)
            blank.append(codimap_hashtag)
        else:
            blank.append('Something is wrong')
            print(f'Having problem with {i+1}th Musinsa Codimap')
            driver.back()
            continue

        codimap_imgurl = soup.find('img', attrs = {'class': 'photo'})
        if codimap_imgurl:
            codimap_imgurl = codimap_imgurl['src']
            if codimap_imgurl.startswith('//'):
                codimap_imgurl = 'https:' + codimap_imgurl
            blank.append(codimap_imgurl)
        else:
            blank.append('Something is wrong')
            print(f'Having problem with {i+1}th Musinsa Codimap')
            driver.back()
            continue

        driver.back()
        #####################

        values.append(blank)
        print('---------------------------------------------------')
    
    df = pd.DataFrame(values, columns = columns)
    codimap_list.append(df)

Page #1, 1th Musinsa Codimap Crawling
---------------------------------------------------
Page #1, 2th Musinsa Codimap Crawling
---------------------------------------------------
Page #1, 3th Musinsa Codimap Crawling
---------------------------------------------------
Page #1, 4th Musinsa Codimap Crawling
---------------------------------------------------
Page #1, 5th Musinsa Codimap Crawling
---------------------------------------------------
Page #1, 6th Musinsa Codimap Crawling
---------------------------------------------------
Page #1, 7th Musinsa Codimap Crawling
---------------------------------------------------
Page #1, 8th Musinsa Codimap Crawling
---------------------------------------------------
Page #1, 9th Musinsa Codimap Crawling
---------------------------------------------------
Page #1, 10th Musinsa Codimap Crawling
---------------------------------------------------
Page #1, 11th Musinsa Codimap Crawling
---------------------------------------------------
Page #1,

In [4]:
df_ = pd.concat(codimap_list).reset_index(drop = True)
df_.to_csv('무신사_코디맵_크롤링_challenge.csv', encoding = 'utf-8-sig')