# Initializing List of Infosets

In [1]:
from imdb        import IMDb
from collections import namedtuple
from py_util.cli import boxed_text

ia = IMDb()
all_infosets = ia.get_movie_infoset()

# Infoset Examination Block
This block was used to show which infosets had not been caterogized yet.  Since each of the infosets have now been sorted into a categoy, this block only defines the categories.

In [2]:
Infoset = namedtuple( 'Infoset', [ 'name', 'comment'])

dont_need = {
    Infoset( 'airing',      'This is for TV shows, not for movies.'),
    Infoset( 'episodes',    'This is for TV shows, not for movies.'),
    Infoset( 'tv schedule', 'This is for TV shows, not for movies.'),
    Infoset( 'sound clips', 'I am not working with sound or video data.'),
    Infoset( 'video clips', 'I am not working with sound or video data.'),
    Infoset( 'soundtrack' , 'I am not working with sound or video data.'),
    Infoset( 'synopsis',         "Plot summaries, synopsises, and user reviews are too wordy."),
    Infoset( 'critic reviews',   "Plot summaries, synopsises, and user reviews are too wordy."),
    Infoset( 'plot',             "Plot summaries, synopsises, and user reviews are too wordy."),
    Infoset( 'reviews',          "Plot summaries, synopsises, and user reviews are too wordy."),
    Infoset( 'external reviews', "Plot summaries, synopsises, and user reviews are too wordy."),
    Infoset( 'parents guide', "I don't care about technical, location, certification, or release info."),
    Infoset( 'release dates', "I don't care about technical, location, certification, or release info."),
    Infoset( 'release info',  "I don't care about technical, location, certification, or release info."),
    Infoset( 'locations',     "I don't care about technical, location, certification, or release info."),
    Infoset( 'technical',     "I don't care about technical, location, certification, or release info."),
    Infoset( 'photo sites',    'This data is included with "external sites".'),
    Infoset( 'official sites', 'This data is included with "external sites".'),
    Infoset( 'misc sites',     'This data is included with "external sites".'),
}

investigate_more = {
    Infoset( 'alternate versions', 'Nothing for Dr. Zhivago.  Maybe other movies?'),
    Infoset( 'connections', 'Nothing for Dr. Zhivago.  Maybe other movies?'),
    Infoset( 'crazy credits', 'Nothing for Dr. Zhivago.  Maybe other movies?'),
    Infoset( 'faqs', 'Nothing for Dr. Zhivago.  Maybe other movies?'),
    Infoset( 'list', 'Nothing for Dr. Zhivago.  Maybe other movies?'),
    Infoset( 'news', 'Nothing for Dr. Zhivago.  Maybe other movies?'),
}

need = {
    Infoset( 'external sites', 'Includes "official sites", "misc sites", and "photo sites".'),
    Infoset( 'akas', 'Provides "raw akas", which can be searched by country.'),
    Infoset( 'awards', 'Awards are searchable by outcome, so I can filter out nominations easily'),
    Infoset( 'full credits', 'Only returns imdb.Person objects: no roles.'),
    Infoset( 'goofs', 'Can be filtered by category of goof.'),
    Infoset( 'keywords', "Eventually, but I won't be using it for a while."),
    Infoset( 'trivia', 'Returns a list in order of most-found-interesting.'),
    Infoset( 'recommendations', 'The same 12 as online, so it includes this film .'),
    Infoset( 'taglines', "It's unclear whether there are parenthetical statements (in some of them)."),
    Infoset( 'vote details', 'Returns vote count for each possible ranking.'),
    Infoset( 'quotes', "Eventually, but I won't be using it for a while."),
    Infoset( 'main', 'Gotta have main.'),
}

examined_infosets = {i.name for i in need | dont_need | investigate_more}

for infoset in examined_infosets - set(all_infosets):
    raise ValueError( f'There is an evaluation for an invalid infoset named {repr( infoset)}.')

    
if all([i in examined_infosets for i in all_infosets]):
    print( boxed_text( "All infosets have been evaluated!"))
else:
    print( boxed_text( "The following infosets are not in a category:"))
    for infoset in all_infosets:
        if infoset not in examined_infosets:
            print( infoset)

||                                           ||
||     All infosets have been evaluated!     ||
||                                           ||


# Examining the Infoset Categories in Detail

In [3]:
def print_infoset_set( infoset_set):
    longest_name = max( {len( str( x.name)) for x in infoset_set})
    for name, comment in sorted( infoset_set, key=lambda x: x.name):
        print( f"{name}{' ' * (longest_name - len( name))}   ||   {comment}")

## Infosets that I will be using something from

In [4]:
print_infoset_set( need)

akas              ||   Provides "raw akas", which can be searched by country.
awards            ||   Awards are searchable by outcome, so I can filter out nominations easily
external sites    ||   Includes "official sites", "misc sites", and "photo sites".
full credits      ||   Only returns imdb.Person objects: no roles.
goofs             ||   Can be filtered by category of goof.
keywords          ||   Eventually, but I won't be using it for a while.
main              ||   Gotta have main.
quotes            ||   Eventually, but I won't be using it for a while.
recommendations   ||   The same 12 as online, so it includes this film .
taglines          ||   It's unclear whether there are parenthetical statements (in some of them).
trivia            ||   Returns a list in order of most-found-interesting.
vote details      ||   Returns vote count for each possible ranking.


## Infosets that I am undecided about
None of the infosets below had anything for Doctor Zhivago, which was the film that i used for testing.  As part of my data-injestion engine, I will see if any other films have values for these, or if they are just broken.

In [5]:
print_infoset_set( investigate_more)

alternate versions   ||   Nothing for Dr. Zhivago.  Maybe other movies?
connections          ||   Nothing for Dr. Zhivago.  Maybe other movies?
crazy credits        ||   Nothing for Dr. Zhivago.  Maybe other movies?
faqs                 ||   Nothing for Dr. Zhivago.  Maybe other movies?
list                 ||   Nothing for Dr. Zhivago.  Maybe other movies?
news                 ||   Nothing for Dr. Zhivago.  Maybe other movies?


## Infosets that I will not be using

In [6]:
print_infoset_set( dont_need)

airing             ||   This is for TV shows, not for movies.
critic reviews     ||   Plot summaries, synopsises, and user reviews are too wordy.
episodes           ||   This is for TV shows, not for movies.
external reviews   ||   Plot summaries, synopsises, and user reviews are too wordy.
locations          ||   I don't care about technical, location, certification, or release info.
misc sites         ||   This data is included with "external sites".
official sites     ||   This data is included with "external sites".
parents guide      ||   I don't care about technical, location, certification, or release info.
photo sites        ||   This data is included with "external sites".
plot               ||   Plot summaries, synopsises, and user reviews are too wordy.
release dates      ||   I don't care about technical, location, certification, or release info.
release info       ||   I don't care about technical, location, certification, or release info.
reviews            ||   Plot summ