# 행렬 곱

In [1]:
arr1 = [
    [1, 2],
    [3, 4],
]

In [2]:
arr2 = [
    [5, 6],
    [7, 8],
]

In [4]:
list(zip(*arr2))

# zip([5, 6], [7, 8]) # generator

[(5, 7), (6, 8)]

In [6]:
mylist1 = ['a', 'b', 'c']
mylist2 = ['x', 'y', 'z']

for i in range(len(mylist1)):
    v1, v2 = mylist1[i], mylist2[i]
    print(v1, v2)

for (v1, v2) in zip(mylist1, mylist2):
    print(v1, v2)

print('----')
mylist = [mylist1, mylist2]
print(mylist)

for (v1, v2) in zip(*mylist):
    print(v1, v2)

a x
b y
c z
a x
b y
c z
----
[['a', 'b', 'c'], ['x', 'y', 'z']]
a x
b y
c z


In [8]:
result_list = []
for row1 in arr1:
    row_list = []
    for row2 in zip(*arr2):
        result = sum(i*j for i, j in zip(row1, row2))
        row_list.append(result)
    result_list.append(row_list)
result_list

[[19, 22], [43, 50]]

In [14]:
[[sum(i*j for i, j in zip(row1, row2))
      for row2 in zip(*arr2)]
     for row1 in arr1]

[[19, 22], [43, 50]]

# 네이버 웹툰, 목록 크롤링

In [34]:
webtoon_url = "http://comic.naver.com/webtoon/list.nhn"

In [35]:
# '&'.join('{}={}'.format(k, v) for k, v in params.items())

In [52]:
import requests
from bs4 import BeautifulSoup
from collections import OrderedDict
from itertools import count
from urllib.parse import urljoin

def get_webtoon_ep_list(title_id):
    # ep_dict = OrderedDict()
    ep_list = []

    for page in count(1):
        params = {'titleId': title_id, 'page': page}
        print(params)
        html = requests.get(webtoon_url, params=params).text
        
        soup = BeautifulSoup(html, 'html.parser')

        for a_tag in soup.select('.viewList tr a'):
            img_tag = a_tag.find('img')
            if img_tag is None:
                continue
            ep_title = img_tag['title']
            ep_img_url = img_tag['src']
            ep_url = urljoin(webtoon_url, a_tag['href'])
            
            #if ep_url in ep_dict:
            #    return ep_dict
            
            if ep_url in ep_list:
                return None
            
            '''
            ep_dict[ep_url] = {
                'title': ep_title,
                'img_url': ep_img_url,
            }
            '''
            
            ep_list.append(ep_url)
            yield {
                'url': ep_url,
                'title': ep_title,
                'img_url': ep_img_url,
            }


In [53]:
title_id = 655746  # 마법스크롤상인 지오
for ep in get_webtoon_ep_list(title_id):
    print(ep)

{'titleId': 655746, 'page': 1}
{'title': '시즌1 후기', 'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=655746&no=81&weekday=sun', 'img_url': 'http://thumb.comic.naver.net/webtoon/655746/81/inst_thumbnail_20161217121740.jpg'}
{'title': 'Epilogue', 'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=655746&no=80&weekday=sun', 'img_url': 'http://thumb.comic.naver.net/webtoon/655746/80/inst_thumbnail_20161208184625.jpg'}
{'title': '78화. Dragon 11.', 'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=655746&no=79&weekday=sun', 'img_url': 'http://thumb.comic.naver.net/webtoon/655746/79/inst_thumbnail_20161201195156.jpg'}
{'title': '77화. Dragon 10.', 'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=655746&no=78&weekday=sun', 'img_url': 'http://thumb.comic.naver.net/webtoon/655746/78/inst_thumbnail_20161124192806.jpg'}
{'title': '76화. Dragon 9.', 'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=655746&no=77&weekday=sun', 'img_url': 'http://thumb.comic.naver

# 네이버 웹툰, 특정 에피소드 다운로드

In [60]:
ep_url = "http://comic.naver.com/webtoon/detail.nhn"
title_id = 655746
no = 79
params = {'titleId': title_id, 'no': no}

In [68]:
import os
import requests
from bs4 import BeautifulSoup
from PIL import Image

In [69]:
html = requests.get(ep_url, params=params).text
soup = BeautifulSoup(html, 'html.parser')

In [74]:
im_list = []

for img_tag in soup.select('.wt_viewer img'):
    img_url = img_tag['src']
    file_name = os.path.basename(img_url)
    file_path = os.path.join('data', str(title_id), str(no), file_name)
    dir_path = os.path.dirname(file_path)
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    if not os.path.exists(file_path):
        headers = {'Referer': ep_url}
        img_data = requests.get(img_url, headers=headers).content
        with open(file_path, 'wb') as f:
            f.write(img_data)
    
    print(file_path)
    im = Image.open(file_path)
    im_list.append(im)

data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_1.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_2.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_3.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_4.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_5.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_6.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_7.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_8.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_9.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_10.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_11.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_12.jpg
data/655746/79/20161201195201_0e05c0d20addc0e979d79763426a6984_IMAG01_13.jpg
data/655

In [78]:
canvas_width = max(im.size[0] for im in im_list)

In [81]:
canvas_height = min(65000 ,sum(im.size[1] for im in im_list))

In [82]:
canvas_width, canvas_height

(690, 65000)

In [84]:
canvas = Image.new('RGB', (canvas_width, canvas_height))

In [93]:
top = 0
for im in im_list:
    canvas.paste(im, (0, top))
    top += canvas.size[1]
    if top > canvas.size[1]:
        break

In [94]:
canvas.show()

In [95]:
canvas_name = '{}-{}.jpg'.format(title_id, no)
canvas.save(canvas_name)