@@ -425,11 +425,32 @@ def _build_plex_item(item: PlexPartialObject) -> PlexItem:
425425 )
426426 for a in getattr (item , "actors" , []) or getattr (item , "roles" , []) or []
427427 ]
428+ genres = [
429+ str (getattr (g , "tag" , "" ))
430+ for g in getattr (item , "genres" , []) or []
431+ if getattr (g , "tag" , None )
432+ ]
433+ collections = [
434+ str (getattr (c , "tag" , "" ))
435+ for c in getattr (item , "collections" , []) or []
436+ if getattr (c , "tag" , None )
437+ ]
438+ season_number = getattr (item , "parentIndex" , None )
439+ if isinstance (season_number , str ):
440+ season_number = int (season_number ) if season_number .isdigit () else None
441+ episode_number = getattr (item , "index" , None )
442+ if isinstance (episode_number , str ):
443+ episode_number = int (episode_number ) if episode_number .isdigit () else None
444+
428445 return PlexItem (
429446 rating_key = str (getattr (item , "ratingKey" , "" )),
430447 guid = str (getattr (item , "guid" , "" )),
431448 type = str (getattr (item , "type" , "" )),
432449 title = str (getattr (item , "title" , "" )),
450+ show_title = getattr (item , "grandparentTitle" , None ),
451+ season_title = getattr (item , "parentTitle" , None ),
452+ season_number = season_number ,
453+ episode_number = episode_number ,
433454 summary = getattr (item , "summary" , None ),
434455 year = getattr (item , "year" , None ),
435456 added_at = getattr (item , "addedAt" , None ),
@@ -441,6 +462,8 @@ def _build_plex_item(item: PlexPartialObject) -> PlexItem:
441462 directors = directors ,
442463 writers = writers ,
443464 actors = actors ,
465+ genres = genres ,
466+ collections = collections ,
444467 )
445468
446469
@@ -556,6 +579,13 @@ def _load_from_sample(sample_dir: Path) -> List[AggregatedItem]:
556579 )
557580 for a in movie_data .get ("Role" , [])
558581 ],
582+ genres = [g .get ("tag" , "" ) for g in movie_data .get ("Genre" , []) if g .get ("tag" )],
583+ collections = [
584+ c .get ("tag" , "" )
585+ for key in ("Collection" , "Collections" )
586+ for c in movie_data .get (key , []) or []
587+ if c .get ("tag" )
588+ ],
559589 )
560590 with (movie_dir / "imdb.json" ).open ("r" , encoding = "utf-8" ) as f :
561591 imdb_movie = IMDbTitle .model_validate (json .load (f ))
@@ -571,6 +601,10 @@ def _load_from_sample(sample_dir: Path) -> List[AggregatedItem]:
571601 guid = str (episode_data .get ("guid" , "" )),
572602 type = episode_data .get ("type" , "episode" ),
573603 title = episode_data .get ("title" , "" ),
604+ show_title = episode_data .get ("grandparentTitle" ),
605+ season_title = episode_data .get ("parentTitle" ),
606+ season_number = episode_data .get ("parentIndex" ),
607+ episode_number = episode_data .get ("index" ),
574608 summary = episode_data .get ("summary" ),
575609 year = episode_data .get ("year" ),
576610 added_at = episode_data .get ("addedAt" ),
@@ -596,6 +630,13 @@ def _load_from_sample(sample_dir: Path) -> List[AggregatedItem]:
596630 )
597631 for a in episode_data .get ("Role" , [])
598632 ],
633+ genres = [g .get ("tag" , "" ) for g in episode_data .get ("Genre" , []) if g .get ("tag" )],
634+ collections = [
635+ c .get ("tag" , "" )
636+ for key in ("Collection" , "Collections" )
637+ for c in episode_data .get (key , []) or []
638+ if c .get ("tag" )
639+ ],
599640 )
600641 with (episode_dir / "imdb.tv.json" ).open ("r" , encoding = "utf-8" ) as f :
601642 imdb_episode = IMDbTitle .model_validate (json .load (f ))
@@ -657,15 +698,43 @@ async def run(
657698 # Assemble points with server-side embeddings
658699 points : List [models .PointStruct ] = []
659700 for item in items :
701+ primary_title = item .plex .title
702+ if item .plex .type == "episode" :
703+ title_bits : list [str ] = []
704+ if item .plex .show_title :
705+ title_bits .append (item .plex .show_title )
706+ se_parts : list [str ] = []
707+ if item .plex .season_number is not None :
708+ se_parts .append (f"S{ item .plex .season_number :02d} " )
709+ if item .plex .episode_number is not None :
710+ se_parts .append (f"E{ item .plex .episode_number :02d} " )
711+ if se_parts :
712+ title_bits .append ("" .join (se_parts ))
713+ if item .plex .title :
714+ title_bits .append (item .plex .title )
715+ if title_bits :
716+ primary_title = " - " .join (title_bits )
660717 parts = [
661- item . plex . title ,
718+ primary_title ,
662719 item .plex .summary or "" ,
663720 item .tmdb .overview if item .tmdb and hasattr (item .tmdb , "overview" ) else "" ,
664721 item .imdb .plot if item .imdb else "" ,
665- " " .join (p .tag for p in item .plex .directors ),
666- " " .join (p .tag for p in item .plex .writers ),
667- " " .join (p .tag for p in item .plex .actors ),
668722 ]
723+ directors_text = ", " .join (p .tag for p in item .plex .directors if p .tag )
724+ writers_text = ", " .join (p .tag for p in item .plex .writers if p .tag )
725+ actors_text = ", " .join (p .tag for p in item .plex .actors if p .tag )
726+ if directors_text :
727+ parts .append (f"Directed by { directors_text } " )
728+ if writers_text :
729+ parts .append (f"Written by { writers_text } " )
730+ if actors_text :
731+ parts .append (f"Starring { actors_text } " )
732+ if item .plex .tagline :
733+ parts .append (item .plex .tagline )
734+ if item .tmdb and hasattr (item .tmdb , "tagline" ):
735+ tagline = getattr (item .tmdb , "tagline" , None )
736+ if tagline :
737+ parts .append (tagline )
669738 if item .tmdb and hasattr (item .tmdb , "reviews" ):
670739 parts .extend (r .get ("content" , "" ) for r in getattr (item .tmdb , "reviews" , []))
671740 text = "\n " .join (p for p in parts if p )
@@ -674,8 +743,45 @@ async def run(
674743 "title" : item .plex .title ,
675744 "type" : item .plex .type ,
676745 }
746+ if item .plex .type == "episode" :
747+ if item .plex .show_title :
748+ payload ["show_title" ] = item .plex .show_title
749+ if item .plex .season_title :
750+ payload ["season_title" ] = item .plex .season_title
751+ if item .plex .season_number is not None :
752+ payload ["season_number" ] = item .plex .season_number
753+ if item .plex .episode_number is not None :
754+ payload ["episode_number" ] = item .plex .episode_number
677755 if item .plex .actors :
678- payload ["actors" ] = [p .tag for p in item .plex .actors ]
756+ payload ["actors" ] = [p .tag for p in item .plex .actors if p .tag ]
757+ if item .plex .directors :
758+ payload ["directors" ] = [p .tag for p in item .plex .directors if p .tag ]
759+ if item .plex .writers :
760+ payload ["writers" ] = [p .tag for p in item .plex .writers if p .tag ]
761+ if item .plex .genres :
762+ payload ["genres" ] = item .plex .genres
763+ if item .plex .collections :
764+ payload ["collections" ] = item .plex .collections
765+ summary = item .plex .summary
766+ if summary :
767+ payload ["summary" ] = summary
768+ overview = getattr (item .tmdb , "overview" , None ) if item .tmdb else None
769+ if overview :
770+ payload ["overview" ] = overview
771+ plot = item .imdb .plot if item .imdb else None
772+ if plot :
773+ payload ["plot" ] = plot
774+ taglines = [item .plex .tagline ]
775+ if item .tmdb and hasattr (item .tmdb , "tagline" ):
776+ taglines .append (getattr (item .tmdb , "tagline" , None ))
777+ taglines = [t for t in taglines if t ]
778+ if taglines :
779+ payload ["tagline" ] = "\n " .join (dict .fromkeys (taglines ))
780+ if item .tmdb and hasattr (item .tmdb , "reviews" ):
781+ review_texts = [r .get ("content" , "" ) for r in getattr (item .tmdb , "reviews" , [])]
782+ review_texts = [r for r in review_texts if r ]
783+ if review_texts :
784+ payload ["reviews" ] = review_texts
679785 if item .plex .year is not None :
680786 payload ["year" ] = item .plex .year
681787 if item .plex .added_at is not None :
@@ -719,15 +825,16 @@ async def run(
719825 created_collection = True
720826
721827 if created_collection :
828+ text_index = models .TextIndexParams (
829+ type = models .PayloadSchemaType .TEXT ,
830+ tokenizer = models .TokenizerType .WORD ,
831+ min_token_len = 2 ,
832+ lowercase = True ,
833+ )
722834 await client .create_payload_index (
723835 collection_name = collection_name ,
724836 field_name = "title" ,
725- field_schema = models .TextIndexParams (
726- type = models .PayloadSchemaType .TEXT ,
727- tokenizer = models .TokenizerType .WORD ,
728- min_token_len = 2 ,
729- lowercase = True ,
730- ),
837+ field_schema = text_index ,
731838 )
732839 await client .create_payload_index (
733840 collection_name = collection_name ,
@@ -749,6 +856,66 @@ async def run(
749856 field_name = "actors" ,
750857 field_schema = models .PayloadSchemaType .KEYWORD ,
751858 )
859+ await client .create_payload_index (
860+ collection_name = collection_name ,
861+ field_name = "directors" ,
862+ field_schema = models .PayloadSchemaType .KEYWORD ,
863+ )
864+ await client .create_payload_index (
865+ collection_name = collection_name ,
866+ field_name = "writers" ,
867+ field_schema = models .PayloadSchemaType .KEYWORD ,
868+ )
869+ await client .create_payload_index (
870+ collection_name = collection_name ,
871+ field_name = "genres" ,
872+ field_schema = models .PayloadSchemaType .KEYWORD ,
873+ )
874+ await client .create_payload_index (
875+ collection_name = collection_name ,
876+ field_name = "show_title" ,
877+ field_schema = models .PayloadSchemaType .KEYWORD ,
878+ )
879+ await client .create_payload_index (
880+ collection_name = collection_name ,
881+ field_name = "season_number" ,
882+ field_schema = models .PayloadSchemaType .INTEGER ,
883+ )
884+ await client .create_payload_index (
885+ collection_name = collection_name ,
886+ field_name = "episode_number" ,
887+ field_schema = models .PayloadSchemaType .INTEGER ,
888+ )
889+ await client .create_payload_index (
890+ collection_name = collection_name ,
891+ field_name = "collections" ,
892+ field_schema = models .PayloadSchemaType .KEYWORD ,
893+ )
894+ await client .create_payload_index (
895+ collection_name = collection_name ,
896+ field_name = "summary" ,
897+ field_schema = text_index ,
898+ )
899+ await client .create_payload_index (
900+ collection_name = collection_name ,
901+ field_name = "overview" ,
902+ field_schema = text_index ,
903+ )
904+ await client .create_payload_index (
905+ collection_name = collection_name ,
906+ field_name = "plot" ,
907+ field_schema = text_index ,
908+ )
909+ await client .create_payload_index (
910+ collection_name = collection_name ,
911+ field_name = "tagline" ,
912+ field_schema = text_index ,
913+ )
914+ await client .create_payload_index (
915+ collection_name = collection_name ,
916+ field_name = "reviews" ,
917+ field_schema = text_index ,
918+ )
752919 await client .create_payload_index (
753920 collection_name = collection_name ,
754921 field_name = "data.plex.rating_key" ,
0 commit comments