From be1743d2ffc4803fa77973dbef96ab852c14b0c5 Mon Sep 17 00:00:00 2001 From: jvlflame Date: Tue, 11 Aug 2020 01:15:51 -0700 Subject: [PATCH] Update scrapers --- .../Convert-HTMLCharacter.ps1 | 0 src/Javinizer/Public/Get-DmmDataObject.ps1 | 15 ++- src/Javinizer/Public/Get-JavbusDataObject.ps1 | 89 +++++++++------ src/Javinizer/Public/Get-JavbusUrl.ps1 | 2 +- .../Public/Get-JavlibraryDataObject.ps1 | 105 +++++++++++------- src/Javinizer/Public/Get-JavlibraryUrl.ps1 | 6 +- src/Javinizer/Public/Get-R18DataObject.ps1 | 56 +++++----- 7 files changed, 163 insertions(+), 110 deletions(-) rename src/Javinizer/{Public => Private}/Convert-HTMLCharacter.ps1 (100%) diff --git a/src/Javinizer/Public/Convert-HTMLCharacter.ps1 b/src/Javinizer/Private/Convert-HTMLCharacter.ps1 similarity index 100% rename from src/Javinizer/Public/Convert-HTMLCharacter.ps1 rename to src/Javinizer/Private/Convert-HTMLCharacter.ps1 diff --git a/src/Javinizer/Public/Get-DmmDataObject.ps1 b/src/Javinizer/Public/Get-DmmDataObject.ps1 index a77e0667..70205811 100644 --- a/src/Javinizer/Public/Get-DmmDataObject.ps1 +++ b/src/Javinizer/Public/Get-DmmDataObject.ps1 @@ -238,7 +238,7 @@ function Get-DmmActress { ) process { - $actressArray = @() + $movieActressObject = @() $actressHtml = ((($WebRequest.Content -split '出演者:<\/td>')[1] -split '<\/td>')[0] -split '')[1] $actressHtml = $actressHtml -replace '', '' $actressHtml = $actressHtml -split '<\/a>', '' @@ -247,14 +247,19 @@ function Get-DmmActress { foreach ($actress in $actressHtml) { $actress = Convert-HtmlCharacter -String $actress if ($actress -ne '') { - $actressArray += $actress -replace '<\/a>', '' + $movieActressObject += [pscustomobject]@{ + LastName = $null + FirstName = $null + JapaneseName = $actress -replace '<\/a>', '' + ThumbUrl = $null + } } } - Write-Output $actressArray } else { - $actressArray = $null - Write-Output $actressArray + $movieActressObject = $null } + Write-Output $movieActressObject + } } diff --git a/src/Javinizer/Public/Get-JavbusDataObject.ps1 b/src/Javinizer/Public/Get-JavbusDataObject.ps1 index 43c60e7a..f2539f00 100644 --- a/src/Javinizer/Public/Get-JavbusDataObject.ps1 +++ b/src/Javinizer/Public/Get-JavbusDataObject.ps1 @@ -17,23 +17,22 @@ function Get-JavbusDataObject { } $movieDataObject = [pscustomobject]@{ - Source = 'javbus' - Url = $Url - Id = Get-JavbusId -WebRequest $webRequest - Title = Get-JavbusTitle -WebRequest $webRequest - Date = Get-JavbusReleaseDate -WebRequest $webRequest - Year = Get-JavbusReleaseYear -WebRequest $webRequest - Runtime = Get-JavbusRuntime -WebRequest $webRequest - Director = Get-JavbusDirector -WebRequest $webRequest - Maker = Get-JavbusMaker -WebRequest $webRequest - Label = Get-JavbusLabel -WebRequest $webRequest - Series = Get-JavbusSeries -WebRequest $webRequest - Rating = Get-JavbusRating -WebRequest $webRequest - Actress = (Get-JavbusActress -WebRequest $webRequest).Name - ActressThumbUrl = (Get-JavbusActress -WebRequest $webRequest).ThumbUrl - Genre = Get-JavbusGenre -WebRequest $webRequest - CoverUrl = Get-JavbusCoverUrl -WebRequest $webRequest - ScreenshotUrl = Get-JavbusScreenshotUrl -WebRequest $webRequest + Source = 'javbus' + Url = $Url + Id = Get-JavbusId -WebRequest $webRequest + Title = Get-JavbusTitle -WebRequest $webRequest + Date = Get-JavbusReleaseDate -WebRequest $webRequest + Year = Get-JavbusReleaseYear -WebRequest $webRequest + Runtime = Get-JavbusRuntime -WebRequest $webRequest + Director = Get-JavbusDirector -WebRequest $webRequest + Maker = Get-JavbusMaker -WebRequest $webRequest + Label = Get-JavbusLabel -WebRequest $webRequest + Series = Get-JavbusSeries -WebRequest $webRequest + Rating = Get-JavbusRating -WebRequest $webRequest + Actress = Get-JavbusActress -WebRequest $webRequest + Genre = Get-JavbusGenre -WebRequest $webRequest + CoverUrl = Get-JavbusCoverUrl -WebRequest $webRequest + ScreenshotUrl = Get-JavbusScreenshotUrl -WebRequest $webRequest } Write-JLog -Level Debug -Message "JavBus data object: $($movieDataObject | ConvertTo-Json -Depth 32 -Compress)" @@ -248,26 +247,52 @@ function Get-JavbusActress { ) process { - $actress = @() + $actresses = @() + $movieActressObject = @() + $textInfo = (Get-Culture).TextInfo try { - $actress = ($WebRequest | ForEach-Object { $_ -split '\n' } | - Select-String '(.*)<\/a>').Matches | - ForEach-Object { $_.Groups[3].Value } | - Where-Object { $_ -ne '' } | - Select-Object -Unique - - $actressThumb = ($WebRequest | ForEach-Object { $_ -split '\n' } | - Select-String '<\/a>').Matches | - ForEach-Object { $_.Groups[3].Value } | - Where-Object { $_ -ne '' } - + try { + $actresses = ($WebRequest | Select-String -AllMatches -Pattern '<\/a>').Matches + } catch { + return + } - $movieActressObject = [pscustomobject]@{ - Name = $actress - ThumbUrl = $actressThumb + foreach ($actress in $actresses) { + $thumbUrl = $actress.Groups[1].Value + if ($thumbUrl -like '*nowprinting*' -or $thumbUrl -like '*now_printing*') { + $thumbUrl = $null + } + + # Match if the name contains Japanese characters + if ($actress.Groups[2].Value -match '[\u3040-\u309f]|[\u30a0-\u30ff]|[\uff66-\uff9f]|[\u4e00-\u9faf]') { + $movieActressObject += [pscustomobject]@{ + LastName = $null + FirstName = $null + JapaneseName = $actress.Groups[2].Value + ThumbUrl = $thumbUrl + } + } else { + $firstName = ($actress.Groups[2].Value -split ' ')[1] + if ($null -ne $firstName) { + $firstName = $textInfo.ToTitleCase($firstName.ToLower()) + } + + $lastName = ($actress.Groups[2].Value -split ' ')[0] + if ($null -ne $lastName) { + $lastName = $textInfo.ToTitleCase($lastName.ToLower()) + } + + $movieActressObject += [pscustomobject]@{ + LastName = $lastName + FirstName = $firstName + JapaneseName = $null + ThumbUrl = $thumbUrl + } + } } } catch { + Write-Error $_ return } diff --git a/src/Javinizer/Public/Get-JavbusUrl.ps1 b/src/Javinizer/Public/Get-JavbusUrl.ps1 index 298c987c..1aceb025 100644 --- a/src/Javinizer/Public/Get-JavbusUrl.ps1 +++ b/src/Javinizer/Public/Get-JavbusUrl.ps1 @@ -3,7 +3,7 @@ function Get-JavbusUrl { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [string]$Id, - [Parameter(Mandatory = $true)] + [Parameter(Mandatory = $true, Position = 1)] [ValidateSet('ja', 'en', 'zh')] [string]$Language ) diff --git a/src/Javinizer/Public/Get-JavlibraryDataObject.ps1 b/src/Javinizer/Public/Get-JavlibraryDataObject.ps1 index b9dbb667..bd16c9d9 100644 --- a/src/Javinizer/Public/Get-JavlibraryDataObject.ps1 +++ b/src/Javinizer/Public/Get-JavlibraryDataObject.ps1 @@ -18,27 +18,28 @@ function Get-JavlibraryDataObject { $movieDataObject = [pscustomobject]@{ Source = 'javlibrary' Url = $Url - Id = Get-JLId -WebRequest $webRequest - AjaxId = Get-JLAjaxId -WebRequest $webRequest - Title = Get-JLTitle -WebRequest $webRequest - Date = Get-JLReleaseDate -WebRequest $webRequest - Year = Get-JLReleaseYear -WebRequest $webRequest - Runtime = Get-JLRuntime -WebRequest $webRequest - Director = Get-JLDirector -WebRequest $webRequest - Maker = Get-JLMaker -WebRequest $webRequest - Label = Get-JLLabel -WebRequest $webRequest - Rating = Get-JLRating -WebRequest $webRequest - Actress = Get-JLActress -WebRequest $webRequest - Genre = Get-JLGenre -WebRequest $webRequest - CoverUrl = Get-JLCoverUrl -WebRequest $webRequest - ScreenshotUrl = Get-JLScreenshotUrl -WebRequest $webRequest + Id = Get-JavlibraryId -WebRequest $webRequest + AjaxId = Get-JavlibraryAjaxId -WebRequest $webRequest + Title = Get-JavlibraryTitle -WebRequest $webRequest + Date = Get-JavlibraryReleaseDate -WebRequest $webRequest + Year = Get-JavlibraryReleaseYear -WebRequest $webRequest + Runtime = Get-JavlibraryRuntime -WebRequest $webRequest + Director = Get-JavlibraryDirector -WebRequest $webRequest + Maker = Get-JavlibraryMaker -WebRequest $webRequest + Label = Get-JavlibraryLabel -WebRequest $webRequest + Rating = Get-JavlibraryRating -WebRequest $webRequest + Actress = Get-JavlibraryActress -WebRequest $webRequest + Genre = Get-JavlibraryGenre -WebRequest $webRequest + CoverUrl = Get-JavlibraryCoverUrl -WebRequest $webRequest + ScreenshotUrl = Get-JavlibraryScreenshotUrl -WebRequest $webRequest } Write-JLog -Level Debug -Message "JAVLibrary data object: $($movieDataObject | ConvertTo-Json -Depth 32 -Compress)" Write-Output $movieDataObject } } -function Get-JLId { + +function Get-JavlibraryId { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest @@ -50,7 +51,7 @@ function Get-JLId { } } -function Get-JLAjaxId { +function Get-JavlibraryAjaxId { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest @@ -68,7 +69,7 @@ function Get-JLAjaxId { } } -function Get-JLTitle { +function Get-JavlibraryTitle { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest @@ -81,7 +82,7 @@ function Get-JLTitle { } } -function Get-JLReleaseDate { +function Get-JavlibraryReleaseDate { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest @@ -93,20 +94,20 @@ function Get-JLReleaseDate { } } -function Get-JLReleaseYear { +function Get-JavlibraryReleaseYear { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest ) process { - $releaseYear = Get-JLReleaseDate -WebRequest $WebRequest + $releaseYear = Get-JavlibraryReleaseDate -WebRequest $WebRequest $releaseYear = ($releaseYear -split '-')[0] Write-Output $releaseYear } } -function Get-JLRuntime { +function Get-JavlibraryRuntime { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest @@ -118,7 +119,7 @@ function Get-JLRuntime { } } -function Get-JLDirector { +function Get-JavlibraryDirector { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest @@ -131,12 +132,13 @@ function Get-JLDirector { } else { $director = $null } + $director = Convert-HtmlCharacter -String $director Write-Output $director } } -function Get-JLMaker { +function Get-JavlibraryMaker { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest @@ -149,7 +151,7 @@ function Get-JLMaker { } } -function Get-JLLabel { +function Get-JavlibraryLabel { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest @@ -162,7 +164,7 @@ function Get-JLLabel { } } -function Get-JLRating { +function Get-JavlibraryRating { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest @@ -175,7 +177,7 @@ function Get-JLRating { } } -function Get-JLGenre { +function Get-JavlibraryGenre { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest @@ -201,35 +203,52 @@ function Get-JLGenre { } } -function Get-JLActress { +function Get-JavlibraryActress { param ( [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)] [object]$WebRequest ) process { - $actress = @() - $actressSplitString = '' - $actressSplitHtml = $WebRequest.Content -split $actressSplitString - - foreach ($section in $actressSplitHtml) { - $fullName = (($section -split "rel=`"tag`">")[1] -split "<\/a><\/span>")[0] - if ($fullName -ne '') { - if ($fullName.Length -lt 25) { - $actress += $fullName - } - } + $movieActressObject = @() + + try { + $movieActress = ($WebRequest.Content | Select-String -Pattern '