Skip to content

Commit

Permalink
ISSUE-64 support last updated date for kinozal && skip download if mi…
Browse files Browse the repository at this point in the history
…ssing update
  • Loading branch information
a8t3r committed Nov 7, 2022
1 parent 7f9f170 commit 64eb2a3
Show file tree
Hide file tree
Showing 8 changed files with 477 additions and 15 deletions.
406 changes: 406 additions & 0 deletions tests/trackers/datafixtures/kinozal.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/trackers/test_eniahd.py
Expand Up @@ -13,7 +13,7 @@ def test_get_torrent(response_mock, datafix_read, datafix_dir):
b"GET https://eniatv.com/dl.php?id=5669 -> 200:" + test_torrent,

]) as _:
torr = tracker.get_torrent('https://eniatv.com/viewtopic.php?t=1558')
torr = tracker.get_torrent('https://eniatv.com/viewtopic.php?t=1558', None)
assert torr.hash == 'c815be93f20bf8b12fed14bee35c14b19b1d1984'
assert torr.url == 'https://eniatv.com/viewtopic.php?t=1558'
assert torr.url_file == 'https://eniatv.com/dl.php?id=5669'
Expand Down
23 changes: 23 additions & 0 deletions tests/trackers/test_kinozal.py
@@ -0,0 +1,23 @@
from datetime import datetime
from torrt.trackers.kinozal import KinozalTracker


def test_get_torrent(response_mock, datafix_read, datafix_dir):

tracker = KinozalTracker()
tracker.raise_on_error_response = True

test_torrent = (datafix_dir / 'test.torrent').read_bytes()

with response_mock([
f"GET https://kinozal.me/details.php?id=557593 -> 200: {datafix_read('kinozal.html', encoding='windows-1251')}",
b"GET https://kinozal.me/download.php?id=557593 -> 200:" + test_torrent,

]) as _:
torr = tracker.get_torrent('https://kinozal.me/details.php?id=557593', datetime(2015, 4, 16))
assert torr.hash == 'c815be93f20bf8b12fed14bee35c14b19b1d1984'
assert torr.url == 'https://kinozal.me/details.php?id=557593'
assert torr.url_file == 'https://dl.kinozal.me/download.php?id=557593'
assert torr.parsed.comment == 'примечание'
assert torr.page.date_updated == datetime(2015, 5, 16, 18, 0)
assert torr.page.title == 'Властелин колец (Трилогия) (Смешной перевод Гоблина) / The Lord of the Rings. Trilogy / 2001-2003 / АП (Пучков) / DVDRip :: Кинозал.МЕ'
5 changes: 3 additions & 2 deletions tests/trackers/test_nnmclub.py
@@ -1,3 +1,4 @@
from datetime import datetime
from torrt.trackers.nnmclub import NNMClubTracker


Expand All @@ -13,11 +14,11 @@ def test_get_torrent(response_mock, datafix_read, datafix_dir):
b"GET https://nnmclub.to/forum/download.php?id=762672&sid=None -> 200:" + test_torrent,

]) as _:
torr = tracker.get_torrent('https://nnmclub.to/forum/viewtopic.php?t=889443')
torr = tracker.get_torrent('https://nnmclub.to/forum/viewtopic.php?t=889443', datetime(2015, 4, 16))
assert torr.hash == 'c815be93f20bf8b12fed14bee35c14b19b1d1984'
assert torr.url == 'https://nnmclub.to/forum/viewtopic.php?t=889443'
assert torr.url_file == 'https://nnmclub.to/forum/download.php?id=762672'
assert torr.parsed.comment == 'примечание'
assert torr.page.cover == 'http://funkyimg.com/i/VZL6.jpg'
assert torr.page.date_updated == '2015-04-17 17:50:51'
assert torr.page.date_updated == datetime(2015, 4, 17, 17, 50, 51)
assert torr.page.title == 'Реймонд Хеттинджер | Super — это супер! (2015) HDTV :: NNM-Club'
14 changes: 10 additions & 4 deletions torrt/base_tracker.py
Expand Up @@ -268,11 +268,12 @@ def test_configuration(self) -> bool:
"""This should implement a configuration test, e.g. make test login and report success."""
return True

def get_torrent(self, url: str) -> Optional[TorrentData]:
def get_torrent(self, url: str, last_updated_date: Optional[datetime]) -> Optional[TorrentData]:
"""This method should be implemented in torrent tracker handler class
and must return .torrent file contents.
:param url: URL to download torrent file from
:param last_updated_date: torrent last updated datetime
"""
raise NotImplementedError # pragma: nocover
Expand All @@ -281,7 +282,7 @@ def extract_page_data(self) -> PageData:
data = PageData(
title=self.extract_page_title(),
cover=self.extract_page_cover(),
date_updated=f"{self.extract_page_date_updated() or ''}",
date_updated=self.extract_page_date_updated()
)
return data

Expand Down Expand Up @@ -351,11 +352,12 @@ def get_id_from_link(self, url: str) -> str:
"""
return url.split('=')[1]

def get_torrent(self, url: str) -> Optional[TorrentData]:
def get_torrent(self, url: str, last_updated_date: Optional[datetime]) -> Optional[TorrentData]:
"""This is the main method which returns torrent file contents
of file located at URL.
:param url: URL to find and get torrent from
:param last_updated_date: torrent last updated datetime
"""
download_link = self.get_download_link(url)
Expand All @@ -368,7 +370,11 @@ def get_torrent(self, url: str) -> Optional[TorrentData]:

self.log_debug(f'Torrent download link found: {download_link}')

torrent_contents = self.download_torrent(download_link, referer=url)
if last_updated_date and last_updated_date >= page_data.date_updated:
self.log_info(f'Skip torrent download from {download_link} due to missing update')
return None
else:
torrent_contents = self.download_torrent(download_link, referer=url)

if torrent_contents is None:
self.log_debug(f'Torrent download from `{download_link}` has failed')
Expand Down
7 changes: 5 additions & 2 deletions torrt/toolbox.py
@@ -1,6 +1,7 @@
import logging
import sys
from time import time
from datetime import datetime
from typing import Optional, List, Dict

from .base_bot import BotRegistrationFailed
Expand All @@ -10,7 +11,7 @@
RPCClassesRegistry, TrackerClassesRegistry, config, get_url_from_string,
get_iso_from_timestamp, import_classes, structure_torrent_data, get_torrent_from_url, iter_rpc,
NotifierClassesRegistry, iter_notifiers, BotClassesRegistry, iter_bots, configure_entity,
TorrentData
TorrentData, DATETIME_FORMAT
)

try:
Expand Down Expand Up @@ -398,7 +399,9 @@ def update_torrents(torrents: Dict[str, dict], remove_outdated: bool = True) ->
tracker_torrent = download_cache[page_url]

else:
tracker_torrent = get_torrent_from_url(page_url)
date_updated_ = torrents[rpc_torrent['hash']]['page']['date_updated']
last_updated_date = datetime.strptime(date_updated_, DATETIME_FORMAT) if date_updated_ else None
tracker_torrent = get_torrent_from_url(page_url, last_updated_date)
download_cache[page_url] = tracker_torrent

if tracker_torrent is None:
Expand Down
23 changes: 22 additions & 1 deletion torrt/trackers/kinozal.py
@@ -1,4 +1,5 @@
from typing import List
from datetime import datetime, time, timedelta
from typing import List, Optional

from ..base_tracker import GenericPrivateTracker

Expand All @@ -20,6 +21,26 @@ def get_id_from_link(self, url: str) -> str:
"""Returns forum thread identifier from full thread URL."""
return url.split('=')[1]

def extract_page_date_updated(self) -> Optional[datetime]:
def refresh_in_text(tag):
return tag.name == 'li' and tag.get_text().startswith('Обновлен')

def parse_date(date_val):
if date_val == 'сегодня':
return datetime.today()
elif date_val == 'вчера':
return datetime.today() - timedelta(days=1)
else:
return self.parse_datetime(date_val, '%d %B %Y', locale='ru')

dt_val = getattr(self._torrent_page.find(refresh_in_text).find('span'), 'text', '').strip()
parts = dt_val.split(' в ')
if len(parts) != 2:
return None
else:
time_val = time.fromisoformat(parts[1])
return parse_date(parts[0]).replace(hour=time_val.hour, minute=time_val.minute, second=0, microsecond=0)

def get_download_link(self, url: str) -> str:
"""Tries to find .torrent file download link at forum thread page and return that one."""

Expand Down
12 changes: 7 additions & 5 deletions torrt/utils.py
Expand Up @@ -37,6 +37,7 @@
# This regex is used to get hyperlink from torrent comment.
RE_LINK = re.compile(r'(?P<url>https?://[^\s]+)')

DATETIME_FORMAT='%Y-%m-%d %H:%M:%S'

class HttpClient:
"""Common client to perform HTTP requests."""
Expand Down Expand Up @@ -359,7 +360,7 @@ def update_dict(old_dict: dict, new_dict: dict) -> dict:
class PageData:
"""Represents data extracted from torrent page."""

def __init__(self, title: str, cover: str, date_updated: str):
def __init__(self, title: str, cover: str, date_updated: datetime):
self.title = title
self.cover = cover
self.date_updated = date_updated
Expand All @@ -368,7 +369,7 @@ def to_dict(self):
data = {
'title': self.title,
'cover': self.cover,
'date_updated': self.date_updated,
'date_updated': self.date_updated.strftime(DATETIME_FORMAT) if self.date_updated else None
}
return data

Expand Down Expand Up @@ -441,18 +442,19 @@ def structure_torrent_data(target_dict: dict, hash_str: str, data: TorrentData):
target_dict[hash_str] = data.to_dict()


def get_torrent_from_url(url: Optional[str]) -> Optional[TorrentData]:
def get_torrent_from_url(url: Optional[str], last_updated_date: Optional[datetime] = None) -> Optional[TorrentData]:
"""Downloads torrent from a given URL and returns torrent data.
:param url:
:param url: URL to download torrent file from
:param last_updated_date: torrent last updated datetime
"""
LOGGER.debug(f'Downloading torrent file from `{url}` ...')

tracker: 'GenericTracker' = TrackerObjectsRegistry.get_for_string(url)

if tracker:
torrent_info = tracker.get_torrent(url)
torrent_info = tracker.get_torrent(url, last_updated_date)

if torrent_info is None:
LOGGER.warning(f'Unable to get torrent from `{url}`')
Expand Down

0 comments on commit 64eb2a3

Please sign in to comment.