Skip to content

Commit

Permalink
mangadex: add new source
Browse files Browse the repository at this point in the history
  • Loading branch information
aplanas committed Jun 13, 2018
1 parent cb3e854 commit e65e032
Show file tree
Hide file tree
Showing 5 changed files with 412 additions and 1 deletion.
81 changes: 81 additions & 0 deletions bin/initialdata.json
Expand Up @@ -88,6 +88,17 @@
"model": "core.source",
"pk": 7
},
{
"fields": {
"created": "2014-10-25T00:00:00.000Z",
"modified": "2014-10-25T00:00:00.000Z",
"name": "MangaDex",
"spider": "mangadex",
"url": "https://mangadex.org/"
},
"model": "core.source",
"pk": 8
},
{
"fields": {
"created": "2014-10-25T00:00:00.000Z",
Expand Down Expand Up @@ -217,5 +228,75 @@
},
"model": "core.sourcelanguage",
"pk": 13
},
{
"fields": {
"created": "2014-10-25T00:00:00.000Z",
"language": "DE",
"modified": "2014-10-25T00:00:00.000Z",
"source": 8
},
"model": "core.sourcelanguage",
"pk": 14
},
{
"fields": {
"created": "2014-10-25T00:00:00.000Z",
"language": "EN",
"modified": "2014-10-25T00:00:00.000Z",
"source": 8
},
"model": "core.sourcelanguage",
"pk": 15
},
{
"fields": {
"created": "2014-10-25T00:00:00.000Z",
"language": "ES",
"modified": "2014-10-25T00:00:00.000Z",
"source": 8
},
"model": "core.sourcelanguage",
"pk": 16
},
{
"fields": {
"created": "2014-10-25T00:00:00.000Z",
"language": "FR",
"modified": "2014-10-25T00:00:00.000Z",
"source": 8
},
"model": "core.sourcelanguage",
"pk": 17
},
{
"fields": {
"created": "2014-10-25T00:00:00.000Z",
"language": "IT",
"modified": "2014-10-25T00:00:00.000Z",
"source": 8
},
"model": "core.sourcelanguage",
"pk": 18
},
{
"fields": {
"created": "2014-10-25T00:00:00.000Z",
"language": "RU",
"modified": "2014-10-25T00:00:00.000Z",
"source": 8
},
"model": "core.sourcelanguage",
"pk": 19
},
{
"fields": {
"created": "2014-10-25T00:00:00.000Z",
"language": "PT",
"modified": "2014-10-25T00:00:00.000Z",
"source": 8
},
"model": "core.sourcelanguage",
"pk": 20
}
]
2 changes: 1 addition & 1 deletion kmanga/scrapyctl/tests.py
Expand Up @@ -21,7 +21,7 @@ def setUp(self):
self.scrapy = ScrapyCtl(accounts={}, loglevel='ERROR')
self.command = Command()
self.command.stdout = MagicMock()
self.all_spiders = ['batoto', 'kissmanga', 'mangafox',
self.all_spiders = ['batoto', 'kissmanga', 'mangadex', 'mangafox',
'mangahere', 'mangareader', 'mangasee',
'unionmangas']

Expand Down
29 changes: 29 additions & 0 deletions scraper/scraper/pipelines/clean.py
Expand Up @@ -108,6 +108,8 @@ def convert_to_date(str_, dmy=False):
return datetime.strptime(str_, '%m/%d/%Y').date()
elif re.match(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+00:00', str_):
return datetime.strptime(str_, '%Y-%m-%dT%H:%M:%S+00:00').date()
elif re.match(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} UTC', str_):
return datetime.strptime(str_, '%Y-%m-%d %H:%M:%S UTC').date()
else:
raise ValueError('Format "%s" not recognized' % str_)

Expand Down Expand Up @@ -354,6 +356,7 @@ def clean_manga(self, item, spider):
{
'optional': True,
'max_length': 200,
'exclude': ('',),
}),
'author': (self._clean_field_str,
{
Expand Down Expand Up @@ -503,3 +506,29 @@ def clean_field_mangasee_manga_reading_direction(self, field):

def clean_field_mangasee_manga_status(self, field):
return 'O' if 'Ongoing' in field else 'C'

# -- MangaDex fields
def clean_field_mangadex_manga_reading_direction(self, field):
type_ = self._clean_field_str(field)
reading_direction = {
'Japanese': 'RL',
'Chinese (Simp)': 'RL',
'Korean': 'LR',
}.get(type_, 'LR')
return reading_direction

def clean_field_mangadex_manga_status(self, field):
# Some other status like 'Hiatus' are like Ongoing
return 'C' if 'Completed' in field else 'O'

def clean_field_mangadex_issue_language(self, field):
lang = {
'German': GERMAN,
'English': ENGLISH,
'Spanish (Es)': SPANISH,
'French': FRENCH,
'Italian': ITALIAN,
'Russian': RUSSIAN,
'Portuguese (Br)': PORTUGUESE,
}
return self._clean_field_set(field, lang.values(), translator=lang)

0 comments on commit e65e032

Please sign in to comment.