[Link from the first notebook](./01_scraping_notebook_v2.ipynb)

# Explanatory Data Analysis

## A. EDA Set-up

Since this is a new notebook, there's a need to import certain scripts.

In [1]:
# Import basic libraries
import pandas as pd
import numpy as np

In [2]:
#!pip install matplotlib-venn (uncomment if the module isn't imported)

# Import visualisation libraries
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import seaborn as sns
import pandas as pd
import numpy as np
import string
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer
import ast
from sklearn.datasets import make_multilabel_classification
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from collections import Counter
import os
from nltk.util import ngrams
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

Importing the extracted data into this notebook.

In [3]:
ten_tropes_df = pd.read_csv('10_tropes.csv')
wiki_df  = pd.read_csv('simpsons_episode_data2.csv')

Adjusting the output view.

In [4]:
# some display adjustments to account for the fact that we have many columns
# and some columns contain many characters

np.set_printoptions(threshold=np.inf)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 800)

In [5]:
#checking to see what each header's type is for both dataframes

print(ten_tropes_df.columns)
print(wiki_df.columns)

Index(['Trope Name', 'Trope Description', 'Text Length', 'text_lemma'], dtype='object')
Index(['Episode Title', 'Full Story', 'Tropes'], dtype='object')


In [6]:
#Seeing what makes up the ten tropes dataframe
ten_tropes_df.describe()
ten_tropes_summary_stats = ten_tropes_df.describe()
print(ten_tropes_summary_stats)

       Text Length
count     10.00000
mean    1820.70000
std     1361.34101
min      642.00000
25%      877.25000
50%     1570.00000
75%     2150.25000
max     5204.00000


In [7]:
#Doing the same for the wiki dataframe
wiki_df.describe()
wiki_summary_stats = wiki_df.describe()
print(wiki_summary_stats)

                            Episode Title  \
count                                  13   
unique                                 13   
top     Simpsons Roasting on an Open Fire   
freq                                    1   

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      

In [8]:
# Check data types of each column for the ten tropes dataframe
ten_tropes_data_types = ten_tropes_df.dtypes
print(ten_tropes_data_types)

# Check for missing values in each column
ten_tropes_df_missing_values = ten_tropes_df.isnull().sum()
print(ten_tropes_df_missing_values)

Trope Name           object
Trope Description    object
Text Length           int64
text_lemma           object
dtype: object
Trope Name           0
Trope Description    0
Text Length          0
text_lemma           0
dtype: int64


In [9]:
# Doing the same for wiki_df
wiki_data_types = wiki_df.dtypes
print(wiki_data_types)

# Check for missing values in each column
wiki_missing_values = wiki_df.isnull().sum()
print(wiki_missing_values)

Episode Title    object
Full Story       object
Tropes           object
dtype: object
Episode Title    0
Full Story       0
Tropes           0
dtype: int64


In [10]:
# Iterate through each column in ten_tropes_df and count unique values (for list-type columns) first
for column in ten_tropes_df.columns:
    if isinstance(ten_tropes_df[column].iloc[0], list):
        unique_values = []
        for sublist in ten_tropes_df[column]:
            for val in sublist:
                if val not in unique_values:
                    unique_values.append(val)
        unique_values_count = len(unique_values)
        print(f"Number of unique values in '{column}': {unique_values_count}")
    else:
        print(f"Skipped column '{column}' as it is not a list-type column.")

Skipped column 'Trope Name' as it is not a list-type column.
Skipped column 'Trope Description' as it is not a list-type column.
Skipped column 'Text Length' as it is not a list-type column.
Skipped column 'text_lemma' as it is not a list-type column.


In [11]:
# Done the same for wiki_df
for column in wiki_df.columns:
    if isinstance(wiki_df[column].iloc[0], list):
        unique_values = []
        for sublist in wiki_df[column]:
            for val in sublist:
                if val not in unique_values:
                    unique_values.append(val)
        unique_values_count = len(unique_values)
        print(f"Number of unique values in '{column}': {unique_values_count}")
    else:
        print(f"Skipped column '{column}' as it is not a list-type column.")

Skipped column 'Episode Title' as it is not a list-type column.
Skipped column 'Full Story' as it is not a list-type column.
Skipped column 'Tropes' as it is not a list-type column.


Firstly, we need to see how many tropes does each episode have. (Note to self, from the 552 tropes, select ten who appear the most in the simpsons episode related feature)

In [13]:
ten_tropes_df

Unnamed: 0,Trope Name,Trope Description,Text Length,text_lemma
0,Imagine Spot,Okay Ralphie You win this time But well be backElliot JD be sensitive Dont act like youre at a pingpong match between a ninja and Bigfoot Dr Kelso gives Elliot an odd look Elliot I know that made no sense but hes totally there now in his head Look at his eyes JDs eyes dart back and forth Dr Kelso Look at that— ScrubsA brief cut to a Fantasy Sequence and back usually no more than a few seconds long and commonly bookended by a Fade to White Alice yells at Bob What am I your maid Cut to Alice cleaning the house in a French Maid Outfit Cut to Bobs face — hes clearly just imagined the role and Alice has just slapped him to wake him up back to realityA very popular trope with commercials so much so that it could almost be considered an advertising trope as much as it is a comedy trope with the idea being to either convey how much ones life would be better with the productservice or how easily obtainable it is,917,okay ralphie you win this time but well be backelliot jd be sensitive dont act like youre at a pingpong match between a ninja and bigfoot dr kelso give elliot an odd look elliot i know that make no sense but he totally there now in his head look at his eye jds eye dart back and forth dr kelso look at that— scrubsa brief cut to a fantasy sequence and back usually no more than a few second long and commonly bookended by a fade to white alice yell at bob what be i your maid cut to alice cleaning the house in a french maid outfit cut to bob face — he clearly just imagine the role and alice have just slap him to wake him up back to realitya very popular trope with commercial so much so that it could almost be consider an advertising trope a much a it be a comedy trope with the idea be to either convey how much one life would be well with the productservice or how easily obtainable it be
1,Couch Gag,After seven seasons weve pretty much said everything you can say in this spot— Garfield Garfield and Friends Arbuckle the InvincibleThe Monster Who Couldnt Scare AnybodyThe Ocean BlueA trope throughout a series by which a part of the opening or ending credits is interchangeably switched and the content can be different every show Some of the same shows also have couch gags at the end as a Credits Gag Alternatively this gag may occur at the end of The Teaser just before the credits startNamed for one of its bestknown incarnations the family scrambling to gather on the couch during the opening sequence of The SimpsonsNote that it need not be a gag TV dramas such as Mission Impossible and Justice League Unlimited will sometimes have clips from later in the episodeCompare to Different in Every Episode Not to be confused with the other type of couch and gag,864,after seven season weve pretty much say everything you can say in this spot— garfield garfield and friend arbuckle the invinciblethe monster who couldnt scare anybodythe ocean bluea trope throughout a series by which a part of the opening or end credit be interchangeably switch and the content can be different every show some of the same show also have couch gag at the end a a credit gag alternatively this gag may occur at the end of the teaser just before the credit startnamed for one of it bestknown incarnation the family scramble to gather on the couch during the opening sequence of the simpsonsnote that it need not be a gag tv drama such a mission impossible and justice league unlimited will sometimes have clip from later in the episodecompare to different in every episode not to be confuse with the other type of couch and gag
2,Catchphrase,Catchphrase may refer to one of the following Character Catchphrase A phrase a character repeats multiple times in an identifiable way Index of Catchphrases Tropes that depict different types of catchphrases and the ways characters use them Catchphrase A 1980s Game Show Catch Phrase 1994 A board game from Parker BrothersIf a direct wick has led you here please correct the link so that it points to the corresponding articleImportant LinksAdvertisementTVTropes is licensed under a Creative Commons AttributionNonCommercialShareAlike 30 Unported License Permissions beyond the scope of this license may be available from thestafftvtropesorg,642,catchphrase may refer to one of the follow character catchphrase a phrase a character repeat multiple time in an identifiable way index of catchphrase trope that depict different type of catchphrase and the way character use them catchphrase a 1980s game show catch phrase 1994 a board game from parker brothersif a direct wick have lead you here please correct the link so that it point to the correspond articleimportant linksadvertisementtvtropes be license under a creative common attributionnoncommercialsharealike 30 unported license permission beyond the scope of this license may be available from thestafftvtropesorg
3,Comically Missing the Point,I started to walk down the street when I heard a voice saying Good evening Mr Dowd I turned and there was this big white rabbit leaning against a lamppost Well I thought nothing of that because when youve lived in a town as long as Ive lived in this one you get used to the fact that everybody knows your name— Elwood P Dowd HarveyA character completely misses a really obvious point for comic effect The point is the sort of thing that any reasonable or informed person will spot and understand given a few seconds or enough information However the center of this trope is a person who despite having all the time in the world and all the information comes to a conclusion so wrong its hard to be even further from correct Commonly elicits a response along the lines of thats not whats wrong here Visual gags are often involvedA Sister Trope to Dramatically Missing the PointCompare Alternative Joke Interpretation Are You Pondering What Im Pondering Bad News in a Good Way BaitandSwitch Comment Cloudcuckoolander The Ditz Failed a Spot Check Insane Troll Logic logic that consists of comically missing the point I Take Offense to That Last One Mistaken for Profound Need a Hand or a Handjob Not Actually the Ultimate Question Proportional Article Importance Right for the Wrong Reasons Sidetracked by the Analogy and Skewed PrioritiesInUniverse Examples Only Well have to spend a lot of money just to make another subpage for real life Note The actual reason being that its too common and attracts natter,1507,i start to walk down the street when i heard a voice say good even mr dowd i turn and there be this big white rabbit lean against a lamppost well i thought nothing of that because when youve live in a town a long a ive live in this one you get use to the fact that everybody know your name— elwood p dowd harveya character completely miss a really obvious point for comic effect the point be the sort of thing that any reasonable or inform person will spot and understand give a few second or enough information however the center of this trope be a person who despite have all the time in the world and all the information come to a conclusion so wrong it hard to be even far from correct commonly elicits a response along the line of thats not whats wrong here visual gag be often involveda sister trope to dramatically miss the pointcompare alternative joke interpretation be you ponder what im ponder bad news in a good way baitandswitch comment cloudcuckoolander the ditz fail a spot check insane troll logic logic that consists of comically miss the point i take offense to that last one mistaken for profound need a hand or a handjob not actually the ultimate question proportional article importance right for the wrong reason sidetrack by the analogy and skewed prioritiesinuniverse example only well have to spend a lot of money just to make another subpage for real life note the actual reason be that it too common and attracts natter
4,Running Gag,Thats not a running gag Thats a pun Its a running gag nowKermit the Frog No Fozzie Do not answer that telephone Fozzie Bear But Kermit all these terrific funny things happen when I do answer it Kermit Im aware of that Im aware of that Fozzie Is there no end to this running gag Animal runs in and rips the phone off the wall before carrying it away Fozzie Well I guess that puts an end to this running gag Kermit Yeah and also to all the incoming calls— The Muppet ShowA joke whose humor derives from repetition ideally becoming funnier each time it is repeated Must be repeated at least three times otherwise its a Brick Joke or Chekhovs Gag Common variants are a joke that occurs Once an Episode and a joke that goes on for long enough that it becomes an OverlyLong Gag but not so long that it loses its humorThe ultimate fear for the use of any running gag is that doing it too often threatens to turn it into an Overused Running Gag or worse it was never funny in the first place and didnt improve through repetition Nothing is worse than starting a one hour show with a joke that bombs the first time and reappears twenty more times over the next hourA Running Gag can be limited to a particular episode or recur throughout an entire series If it recurs throughout an entire series then it will often develop variations andor be accompanied by Lampshade Hanging If a character breaks the fourth wall to mention it its Didnt We Use This Joke Already When the story goes on to kill off the gag for good either with a final payoff or just by referencing that its become too overused and its ending right then that is Running Gagged,1633,thats not a run gag thats a pun it a run gag nowkermit the frog no fozzie do not answer that telephone fozzie bear but kermit all these terrific funny thing happen when i do answer it kermit im aware of that im aware of that fozzie be there no end to this run gag animal run in and rip the phone off the wall before carry it away fozzie well i guess that put an end to this run gag kermit yeah and also to all the incoming calls— the muppet showa joke whose humor derives from repetition ideally become funnier each time it be repeat must be repeat at least three time otherwise it a brick joke or chekhov gag common variant be a joke that occurs once an episode and a joke that go on for long enough that it becomes an overlylong gag but not so long that it loses it humorthe ultimate fear for the use of any run gag be that do it too often threatens to turn it into an overuse run gag or bad it be never funny in the first place and didnt improve through repetition nothing be bad than start a one hour show with a joke that bomb the first time and reappears twenty more time over the next houra run gag can be limited to a particular episode or recur throughout an entire series if it recurs throughout an entire series then it will often develop variation andor be accompany by lampshade hang if a character break the fourth wall to mention it it didnt we use this joke already when the story go on to kill off the gag for good either with a final payoff or just by reference that it become too overuse and it end right then that be run gag
5,Disproportionate Retribution,But that was two years agoRevenge is a dish best served with an extra helping— Captain Young Troops of DoomIn some situations it makes sense to let the opponent know that if they so much as sneeze on someone you protect it will cost them a limb If you have tried an eye for an eye and it really didnt do anything except help sell eyepatches the only way to stay alive is to be drastic Pay back any offense tenfold or even a hundredfold if necessary until the survivors learn to stay away andor do everything in their power to keep you in a good mood Its a common tactic of militaries the world over with some regimes such as Nazi Germany being infamous for itThat said the Justice these rivals have in mind is more akin to a brutal beatdown well most of the time it is an actual brutal NoHoldsBarred Beatdown supposed to culminate in the receivers humiliation or death Any attempt to get them to see the all too obvious truth show mercy or realize theyre a step away from utterly ruining the receivers lifecommitting murder will never succeed It invariably takes the hero beating the rival be it in a Cooking Duel or Good Old Fisticuffs and proving Right Makes Might for the poor deluded soul to realize they were wrong all along sometimes even coming around and realizing that Defeat Means FriendshipVillains who claim that their bad pastcircumstances led them to do this will likely make you realize that Freudian Excuse Is No ExcuseAll too often these guys refuse to see reason They promise that theyll come back to kill the hero and shove his mercy and offer of friendship down his wind pipe It might take the arrival of a plot significant character to clear things up and hand out some Epiphany Therapy to all involvedThis is not limited to the antagonists side God help you if that hero youve harmed has a Psycho Supporter And all parties to a masquerade good or bad are often required to kill any poor schmuck who accidentally sees something hes not supposed toThis could be what stops something from being an act of justice instead of an act of vengeance May be used as part of Cruel Mercy A common habit for Lawful Stupid charactersIntriguingly while disproportionate can possibly mean underdoing it youll almost never see that happen,2243,but that be two year agorevenge be a dish best serve with an extra helping— captain young troop of doomin some situation it make sense to let the opponent know that if they so much a sneeze on someone you protect it will cost them a limb if you have try an eye for an eye and it really didnt do anything except help sell eyepatch the only way to stay alive be to be drastic pay back any offense tenfold or even a hundredfold if necessary until the survivor learn to stay away andor do everything in their power to keep you in a good mood it a common tactic of military the world over with some regime such a nazi germany be infamous for itthat say the justice these rival have in mind be more akin to a brutal beatdown well most of the time it be an actual brutal noholdsbarred beatdown suppose to culminate in the receiver humiliation or death any attempt to get them to see the all too obvious truth show mercy or realize theyre a step away from utterly ruin the receiver lifecommitting murder will never succeed it invariably take the hero beating the rival be it in a cooking duel or good old fisticuffs and prove right make might for the poor delude soul to realize they be wrong all along sometimes even come around and realize that defeat mean friendshipvillains who claim that their bad pastcircumstances lead them to do this will likely make you realize that freudian excuse be no excuseall too often these guy refuse to see reason they promise that theyll come back to kill the hero and shove his mercy and offer of friendship down his wind pipe it might take the arrival of a plot significant character to clear thing up and hand out some epiphany therapy to all involvedthis be not limited to the antagonist side god help you if that hero youve harm have a psycho supporter and all party to a masquerade good or bad be often require to kill any poor schmuck who accidentally see something he not suppose tothis could be what stop something from be an act of justice instead of an act of vengeance may be use a part of cruel mercy a common habit for lawful stupid charactersintriguingly while disproportionate can possibly mean underdoing it youll almost never see that happen
6,Establishing Character Moment,In just a handful of scenes weve established the full set of character archetypes to see us through until the series goes off air Zacks a huckster Slater a jock Jessie an uptight brain Kelly a cheerful object of desire and Screech an intolerable bundle of silly faces When Lisa appears on crutches having kicked a television in rage when her favorite nail polish was discontinued her core personality — likes clothes doesnt fancy Screech — is also laid out— Stuart Millard on the first episode of Saved by the BellFirst impressions count and in TV and film even more so theres no point in hiring an actor to give us a Deadpan Snarker if people think theyre seriously being a total ditz until the third act So when the character comes into the plot you give them an Establishing Character MomentThe establishing character moment does not have to be huge it doesnt have to be impressive it doesnt even have to be first Its about revealing a characters motivations and abilities in a single introductory scene They could blow the roof and rappel in from a helicopter with an automatic in each hand but if theyre The Woobie its not a good idea unless youre aiming for a Heartbroken Badass like Harry Dresden or SpiderManSometimes the first thing needed is to set up how they fit into the plot but this may not best reveal their character So the Establishing Character Moment may be one or two scenes down the line For TV shows and their episodic format the character may first do what they need to do in the episode and then near the end establish how the character will fit into the ongoing arcs and themes of the showOther times the Moment may be the small calm when the character carries out something completely unrelated to the plot to show them in their natural element before putting them in an unrelenting storm of plot lines — for instance during a Morning Routine sequenceWhen it happens it cannot be taken back A running punt to a puppy will completely color attempts to Pet the Dog later but if you start with a gentle stroke then some people may get the wrong idea about your villain Then again a Bait the Dog moment may subvert this or it might itself serve to show the complex multifaceted Hidden Depths of that characterIf this happens in a musical it can be in I Am Song formatSubtropes include BaitandSwitch Character Intro Bitch Alert Incoming Ham Newcomer Saves the Day and Reestablishing Character Moment See also Establishing Series Moment May overlap with AMFM Characterization Compare Characterisation Click Moment Contrast Establishing Team Shot when The Team is established,2597,in just a handful of scene weve establish the full set of character archetype to see u through until the series go off air zacks a huckster slater a jock jessie an uptight brain kelly a cheerful object of desire and screech an intolerable bundle of silly face when lisa appear on crutch have kick a television in rage when her favorite nail polish be discontinue her core personality — like clothes doesnt fancy screech — be also laid out— stuart millard on the first episode of save by the bellfirst impression count and in tv and film even more so there no point in hire an actor to give u a deadpan snarker if people think theyre seriously be a total ditz until the third act so when the character come into the plot you give them an establish character momentthe establish character moment do not have to be huge it doesnt have to be impressive it doesnt even have to be first it about reveal a character motivation and ability in a single introductory scene they could blow the roof and rappel in from a helicopter with an automatic in each hand but if theyre the woobie it not a good idea unless youre aim for a heartbroken badass like harry dresden or spidermansometimes the first thing need be to set up how they fit into the plot but this may not best reveal their character so the establish character moment may be one or two scene down the line for tv show and their episodic format the character may first do what they need to do in the episode and then near the end establish how the character will fit into the ongoing arc and theme of the showother time the moment may be the small calm when the character carry out something completely unrelated to the plot to show them in their natural element before put them in an unrelenting storm of plot line — for instance during a morning routine sequencewhen it happens it can not be take back a run punt to a puppy will completely color attempt to pet the dog later but if you start with a gentle stroke then some people may get the wrong idea about your villain then again a bait the dog moment may subvert this or it might itself serve to show the complex multifaceted hidden depth of that characterif this happens in a musical it can be in i be song formatsubtropes include baitandswitch character intro bitch alert incoming ham newcomer save the day and reestablish character moment see also establish series moment may overlap with amfm characterization compare characterisation click moment contrast establish team shot when the team be establish
7,Karma Houdini,So is Satan just generous when people dont have a soul to sell Maybe if you do enough deeds in his nameIts hard to be religious when certain people are never incinerated by bolts of lightning— Calvin about Moe Calvin and HobbesSometimes even LaserGuided Karma misses its targetThe character has done a number of things that deserve a karmic comeuppance most importantly things that caused harm to the innocent But when the time comes for the hammer to fall thats not what happens At least not on them They dont get what they deserve Instead they get away scotfree They are Easily Forgiven maybe even praised and even elected president And they might even have reversed the Humiliation Conga that was being planned for them Worst comes to worst The Bad Guy Wins and a stolen happy ending takes place at the expense of the hapless victim who may be punished in the guilty characters placeThis is it This is all there is to the story The show is over The book is finished The author isnt going to write any more The Word of God has been spoken Karma is out to lunch The villain has become a Karma HoudiniPredictably it is often shocking or downright upsetting to see such scoundrels dodge instant karmic punishment and come out triumphant in the end But as frustrating as it is it does not necessarily make the story worse as long as it doesnt feel forced contrived or even shortsighted Regardless this trope runs the high risk of leaving the audiences thirst for emotional catharsis unsatiated especially when theyve become so invested in seeing the villain theyve projected their hate onto get their full commupance that may or may not come by the end And when it doesnt you can expect them to complain about it and clamor for a sequel where the villain does get their comeuppance — or alternatively theyll try to satisfy their frustratingly unfulfilled desire for retribution by writing their ownThere can be a number of reasons for this trope Sometimes the story is going for a Downer Ending in which the villains escape justice for their crimes Other times the creators overlook relatively minor offenders whether due to not considering their actions worthy of punishment or not having the time to see them get their comeuppance within the story Still other times it may not be possible to exact retribution on the characters who ends up becoming Karma Houdinis It could be that theyre too powerful to be subject to retribution any Eldritch Abomination in a horror story is likely to be this as even if they dont outright win they are so beyond mortal comprehension that the best the protagonists can hope for is to simply escape them Or it could be that theres no way to legally judge them because they are above the law in some way and the protagonists are not willing to risk it all for some vigilante justiceCompare ButtMonkey for which a character ends up having many disproportionately bad things happen to them throughout the story and may also technically qualify for this trope as hard as that sounds The extreme counterpart of the former trope Cosmic Plaything can be considered this tropes polar opposite Also compare with No Good Deed Goes Unpunished where the good guy suffers as a result of doing the right thingContrast the aforementioned LaserGuided Karma when the villains comeuppance hits at an incredibly opportune moment and is directly traceable to their transgression Karma Houdini Warranty when the comeuppance only happens in a sequel and Karmic Death when the comeuppance leads to the villains death Also see Idiot Houdini when a person is forever protected from the results of their stupidityNote that a character doesnt necessarily need to be a fulltime villain to qualify for this trope They may simply end up doing unnecessarily hurtful things yet not suffer the repercussions In contrast when someone is too evil to kill off but may suffer other consequences see Joker ImmunityNote also The work usually must be completed for a character to qualify as this trope Only if there are no more opportunities for LaserGuided Karma to strike can a character be said to have successfully avoided any consequences for their actions Exceptions may be made in cases where Status Quo Is Godnote which is why you should not add this trope to the This Image Is Not an Example index or when a character can be said to have permanently escaped eg they died peacefully in their sleep at an old age And sometimes even thats not enough Unless they somehow managed to get into Heaven and are gleefully rubbing their current state in your face Otherwise resist the urge to put it on an ongoing works pageDont mistake a lack of direct karma for this trope If a character for example murders someone and never gets caught but a freak lightning strike kills them theyve still gotten their comeuppance even though the lightning had no connection to their crime Think of the writer as God with the power to punish or not punish a character who does evilNot to be confused with a literal Houdinistyle escape by the villain to avoid justice thats Villain Exit Stage Left although the two certainly can and often do overlapThis is a spoileriffic trope spoilers shall be unmarked You have been warned,5204,so be satan just generous when people dont have a soul to sell maybe if you do enough deed in his nameits hard to be religious when certain people be never incinerate by bolt of lightning— calvin about moe calvin and hobbessometimes even laserguided karma miss it targetthe character have do a number of thing that deserve a karmic comeuppance most importantly thing that cause harm to the innocent but when the time come for the hammer to fall thats not what happens at least not on them they dont get what they deserve instead they get away scotfree they be easily forgiven maybe even praise and even elect president and they might even have reverse the humiliation conga that be be plan for them bad come to bad the bad guy win and a steal happy end take place at the expense of the hapless victim who may be punish in the guilty character placethis be it this be all there be to the story the show be over the book be finish the author isnt go to write any more the word of god have be spoken karma be out to lunch the villain have become a karma houdinipredictably it be often shock or downright upset to see such scoundrel dodge instant karmic punishment and come out triumphant in the end but a frustrate a it be it do not necessarily make the story bad a long a it doesnt feel force contrive or even shortsighted regardless this trope run the high risk of leave the audience thirst for emotional catharsis unsatiated especially when theyve become so invest in see the villain theyve project their hate onto get their full commupance that may or may not come by the end and when it doesnt you can expect them to complain about it and clamor for a sequel where the villain do get their comeuppance — or alternatively theyll try to satisfy their frustratingly unfulfilled desire for retribution by write their ownthere can be a number of reason for this trope sometimes the story be go for a downer end in which the villain escape justice for their crime other time the creator overlook relatively minor offender whether due to not consider their action worthy of punishment or not have the time to see them get their comeuppance within the story still other time it may not be possible to exact retribution on the character who end up become karma houdini it could be that theyre too powerful to be subject to retribution any eldritch abomination in a horror story be likely to be this a even if they dont outright win they be so beyond mortal comprehension that the best the protagonist can hope for be to simply escape them or it could be that there no way to legally judge them because they be above the law in some way and the protagonist be not willing to risk it all for some vigilante justicecompare buttmonkey for which a character end up have many disproportionately bad thing happen to them throughout the story and may also technically qualify for this trope a hard a that sound the extreme counterpart of the former trope cosmic plaything can be consider this trope polar opposite also compare with no good deed go unpunished where the good guy suffers a a result of do the right thingcontrast the aforementioned laserguided karma when the villain comeuppance hit at an incredibly opportune moment and be directly traceable to their transgression karma houdini warranty when the comeuppance only happens in a sequel and karmic death when the comeuppance lead to the villain death also see idiot houdini when a person be forever protect from the result of their stupiditynote that a character doesnt necessarily need to be a fulltime villain to qualify for this trope they may simply end up do unnecessarily hurtful thing yet not suffer the repercussion in contrast when someone be too evil to kill off but may suffer other consequence see joker immunitynote also the work usually must be complete for a character to qualify a this trope only if there be no more opportunity for laserguided karma to strike can a character be say to have successfully avoid any consequence for their action exception may be make in case where status quo be godnote which be why you should not add this trope to the this image be not an example index or when a character can be say to have permanently escape eg they die peacefully in their sleep at an old age and sometimes even thats not enough unless they somehow manage to get into heaven and be gleefully rub their current state in your face otherwise resist the urge to put it on an ongoing work pagedont mistake a lack of direct karma for this trope if a character for example murder someone and never get caught but a freak lightning strike kill them theyve still gotten their comeuppance even though the lightning have no connection to their crime think of the writer a god with the power to punish or not punish a character who do evilnot to be confuse with a literal houdinistyle escape by the villain to avoid justice thats villain exit stage left although the two certainly can and often do overlapthis be a spoileriffic trope spoiler shall be unmarked you have be warn
8,Characterization Marches On,Nothing reconciles a past of animal abuse better than donut partiesAoi Asahina What the hecks happened to you You werent like this from the beginning you know Yasuhiro Hagakure Well back then my personality hadnt quite solidified yet— Danganronpa Trigger Happy HavocWhen a series starts out the characters usually dont have firmly established personalities or appearance since the writers are just getting a feel for them Time goes on more and more episodes are produced and the characters become better defined with their own set of personality and behavioral quirks Or maybe their early personality gradually gave way to something very different due to Character Development andor Flanderization Whatever the case may be though their early incarnations are forgotten about as people look at the firmly established characterizationIn extreme cases a characters actions in early instances of a work can actively contradict their later established behavior For examples early adventures of the noted Technical Pacifist Batman end with him openly killing criminals This is a subtrope of EarlyInstallment WeirdnessA form of Continuity Drift Compare the OutofCharacter Moment and Depending on the Writer or Flanderization where a single trait gets largely exaggerated until its all the character is known for When the characterization is already firmly established but still ends up changing to something completely different for unexplained reasons then this is Character Derailment If the character is a Long Runner who you find you can talk about in terms of eras often under the curation of a new creative team or when a new project with the character is launched it is EraSpecific PersonalitySee also Character Check when the writers abruptly remember that the character started out as different and give him a few scenes where he acts like he used to if only temporarily,1872,nothing reconciles a past of animal abuse well than donut partiesaoi asahina what the hecks happen to you you werent like this from the begin you know yasuhiro hagakure well back then my personality hadnt quite solidify yet— danganronpa trigger happy havocwhen a series start out the character usually dont have firmly establish personality or appearance since the writer be just get a feel for them time go on more and more episode be produce and the character become well define with their own set of personality and behavioral quirk or maybe their early personality gradually give way to something very different due to character development andor flanderization whatever the case may be though their early incarnation be forgotten about a people look at the firmly establish characterizationin extreme case a character action in early instance of a work can actively contradict their later establish behavior for example early adventure of the note technical pacifist batman end with him openly kill criminal this be a subtrope of earlyinstallment weirdnessa form of continuity drift compare the outofcharacter moment and depend on the writer or flanderization where a single trait get largely exaggerated until it all the character be know for when the characterization be already firmly establish but still end up change to something completely different for unexplained reason then this be character derailment if the character be a long runner who you find you can talk about in term of era often under the curation of a new creative team or when a new project with the character be launch it be eraspecific personalitysee also character check when the writer abruptly remember that the character start out a different and give him a few scene where he act like he use to if only temporarily
9,Special Guest,Special but unknown guest starnote How can you have a guest star in a movieLadies and Gentlemen Mister Burt Bacharach— Austin PowersA star brought into a series to star as himself or a character very similar to one he is famous for playing In American television this is common during Sweeps Especially likely on a Very Special Episode The technical term is Stunt Casting In a sitcom always accompanied by canned cheering and applause Pioneered on I Love LucySee also The Cameo Celebrity Star As Himself Character as Himself Celebrity Edition Guest Fighter is a special version of this that only applies to video games especially Fighting Games and Sesame Street Cred is for celebrities who guest star in shows aimed at children,728,special but unknown guest starnote how can you have a guest star in a movieladies and gentleman mister burt bacharach— austin powersa star brought into a series to star a himself or a character very similar to one he be famous for play in american television this be common during sweep especially likely on a very special episode the technical term be stunt cast in a sitcom always accompany by can cheer and applause pioneer on i love lucysee also the cameo celebrity star a himself character a himself celebrity edition guest fighter be a special version of this that only applies to video game especially fight game and sesame street cred be for celebrity who guest star in show aim at child


In [14]:
# Get unique trope names from the DataFrame
trope_names = ten_tropes_df["Trope Name"].unique()
trope_names

array(['Imagine Spot', 'Couch Gag', 'Catchphrase',
       'Comically Missing the Point', 'Running Gag',
       'Disproportionate Retribution', 'Establishing Character Moment',
       'Karma Houdini', 'Characterization Marches On', 'Special Guest'],
      dtype=object)

In [15]:
# Get the NLTK stopwords
nltk_stopwords = set(stopwords.words('english'))

# Get unique trope names from the DataFrame
trope_names = ten_tropes_df["Trope Name"].unique()

# Create a directory to save the bar chart images
output_directory = "bar_charts"
os.makedirs(output_directory, exist_ok=True)

# Loop through each trope and perform EDA
custom_stopwords = {
    'Imagine Spot': ['stopword1', 'stopword2', 'additional_stopword_for_Imagine_Spot'],
    'Couch Gag': ['stopword3', 'stopword4', 'additional_stopword_for_Couch_Gag'],
    'Catchphrase': ['stopword5', 'stopword6', 'additional_stopword_for_Catchphrase'],
    'Comically Missing the Point': ['stopword7', 'stopword8', 'additional_stopword_for_Comically_Missing_the_Point'],
    'Running Gag': ['stopword9', 'stopword10', 'additional_stopword_for_Running_Gag'],
    'Disproportionate Retribution': ['stopword11', 'stopword12', 'additional_stopword_for_Disproportionate_Retribution'],
    'Establishing Character Moment': ['stopword13', 'stopword14', 'additional_stopword_for_Establishing_Character_Moment'],
    'Karma Houdini': ['stopword15', 'stopword16', 'additional_stopword_for_Karma_Houdini'],
    'Characterization Marches On': ['stopword17', 'stopword18', 'additional_stopword_for_Characterization_Marches_On'],
    'Special Guest': ['stopword19', 'stopword20', 'additional_stopword_for_Special_Guest'],
}

# Loop through each trope and perform EDA
for trope_name in trope_names:
    # Filter the data for the current trope
    trope_data = ten_tropes_df[ten_tropes_df["Trope Name"] == trope_name]

    # Tokenize the lemmatized descriptions and remove common stopwords and custom stopwords
    all_words = " ".join(trope_data["text_lemma"])
    all_words = word_tokenize(all_words)
    
    # Remove common stopwords
    filtered_words = [word for word in all_words if word.lower() not in nltk_stopwords]
    
    # Remove custom stopwords
    filtered_words = [word for word in filtered_words if word.lower() not in custom_stopwords.get(trope_name, [])]

    # Count word frequencies
    word_freq = Counter(filtered_words)

    # Visualize the top occurring words
    top_words = word_freq.most_common(10)
    word, frequency = zip(*top_words)

    # Create a bar plot
    plt.figure(figsize=(10, 6))
    plt.bar(word, frequency)
    plt.title(f"Top Words in {trope_name}")
    plt.xlabel("Words")
    plt.ylabel("Frequency")
    plt.xticks(rotation=45)

    # Save the bar chart to a file in the output directory
    image_file_path = os.path.join(output_directory, f"{trope_name}_bar_chart.png")
    plt.savefig(image_file_path)
    plt.close()  # Close the plot to release resources

# All bar charts have been saved to the output_directory

Since apparently Establishing Character Moment and Karma Houdini both have the word Character appear the most, singular words aren't advised as the model will be unable to tell which is which. Instead, we'll use bigrams.

In [16]:
# Create a directory to save the bar chart images
output_directory = "bar_charts_bigrams"
os.makedirs(output_directory, exist_ok=True)

# Loop through each trope and perform EDA on bigrams
for trope_name in trope_names:
    # Filter the data for the current trope
    trope_data = ten_tropes_df[ten_tropes_df["Trope Name"] == trope_name]

    # Tokenize the lemmatized descriptions and remove common stopwords and custom stopwords
    all_words = " ".join(trope_data["text_lemma"])
    all_words = word_tokenize(all_words)
    
    # Remove common stopwords
    filtered_words = [word for word in all_words if word.lower() not in nltk_stopwords]
    
    # Remove custom stopwords
    filtered_words = [word for word in filtered_words if word.lower() not in custom_stopwords.get(trope_name, [])]

    # Create bigrams
    bigrams = list(ngrams(filtered_words, 2))

    # Count bigram frequencies
    bigram_freq = Counter(bigrams)

    # Visualize the top occurring bigrams
    top_bigrams = bigram_freq.most_common(10)
    bigram, frequency = zip(*top_bigrams)

    # Create a bar plot for bigrams
    plt.figure(figsize=(10, 6))
    plt.bar([" ".join(bg) for bg in bigram], frequency)
    plt.title(f"Top Bigrams in {trope_name}")
    plt.xlabel("Bigrams")
    plt.ylabel("Frequency")
    plt.xticks(rotation=45)

    # Adjust the margins to prevent text from getting cut off
    plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

    # Save the bar chart to a file in the output directory
    image_file_path = os.path.join(output_directory, f"{trope_name}_bigram_bar_chart.png")
    plt.savefig(image_file_path)
    plt.close()  # Close the plot to release resources

# All bigram bar charts have been saved to the output_directory

Again, it's not very meaningful to have just 1 occurence of a bigram. Hence, we're setting a minimum limit of 2.

In [17]:
# Create a directory to save the bar chart images
output_directory = "bar_charts_trigrams"
os.makedirs(output_directory, exist_ok=True)

# Loop through each trope and perform EDA on bigrams
for trope_name in trope_names:
    # Filter the data for the current trope
    trope_data = ten_tropes_df[ten_tropes_df["Trope Name"] == trope_name]

    # Tokenize the lemmatized descriptions and remove common stopwords and custom stopwords
    all_words = " ".join(trope_data["text_lemma"])
    all_words = word_tokenize(all_words)
    
    # Remove common stopwords
    filtered_words = [word for word in all_words if word.lower() not in nltk_stopwords]
    
    # Remove custom stopwords
    filtered_words = [word for word in filtered_words if word.lower() not in custom_stopwords.get(trope_name, [])]

    # Create bigrams
    bigrams = list(ngrams(filtered_words, 2))

    # Count bigram frequencies
    bigram_freq = Counter(bigrams)

    # Set a threshold for bigram frequency (e.g., 2 or more occurrences)
    filtered_bigrams = {bg: freq for bg, freq in bigram_freq.items() if freq > 2}

    if filtered_bigrams:
        # Visualize the top occurring bigrams
        top_bigrams = Counter(filtered_bigrams).most_common(10)
        bigram, frequency = zip(*top_bigrams)

        # Create a bar plot for bigrams
        plt.figure(figsize=(10, 6))
        plt.bar([" ".join(bg) for bg in bigram], frequency)
        plt.title(f"Top Bigrams in {trope_name}")
        plt.xlabel("Bigrams")
        plt.ylabel("Frequency")
        plt.xticks(rotation=45)

        # Adjust the margins to prevent text from getting cut off
        plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

    # Save the bar chart to a file in the output directory
    image_file_path = os.path.join(output_directory, f"{trope_name}_2_minimum_bigram_bar_chart.png")
    plt.savefig(image_file_path)
    plt.close()  # Close the plot to release resources

We might have to do a trigram.

In [18]:
# Loop through each trope and perform EDA on trigrams
for trope_name in trope_names:
    # Filter the data for the current trope
    trope_data = ten_tropes_df[ten_tropes_df["Trope Name"] == trope_name]

    # Tokenize the lemmatized descriptions and remove common stopwords and custom stopwords
    all_words = " ".join(trope_data["text_lemma"])
    all_words = word_tokenize(all_words)
    
    # Remove common stopwords
    filtered_words = [word for word in all_words if word.lower() not in nltk_stopwords]
    
    # Remove custom stopwords
    filtered_words = [word for word in filtered_words if word.lower() not in custom_stopwords.get(trope_name, [])]

    # Create trigrams
    trigrams = list(ngrams(filtered_words, 3))

    # Count trigram frequencies
    trigram_freq = Counter(trigrams)

    # Set a threshold for trigram frequency (e.g., 2 or more occurrences)
    filtered_trigrams = {tg: freq for tg, freq in trigram_freq.items() if freq > 2}

    if filtered_trigrams:
        # Visualize the top occurring trigrams
        top_trigrams = Counter(filtered_trigrams).most_common(10)
        trigram, frequency = zip(*top_trigrams)

        # Create a bar plot for trigrams
        plt.figure(figsize=(10, 6))
        plt.bar([" ".join(tg) for tg in trigram], frequency)
        plt.title(f"Top Trigrams in {trope_name}")
        plt.xlabel("Trigrams")
        plt.ylabel("Frequency")
        plt.xticks(rotation=45)

        # Adjust the margins to prevent text from getting cut off
        plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

        # Create a subdirectory within the main output directory for trigrams
        trigram_output_directory = os.path.join(output_directory, "trigrams")
        os.makedirs(trigram_output_directory, exist_ok=True)
        
        # Save the bar chart to a file in the output directory
        image_file_path = os.path.join(output_directory, f"{trope_name}_2_minimum_trigram_bar_chart.png")
        plt.savefig(image_file_path)
        plt.close()  # Close the plot to release resources

In [19]:
X, y = make_multilabel_classification(n_classes=3, random_state=0)
clf = MultiOutputClassifier(LogisticRegression()).fit(X, y)
clf.predict(X[-2:])

array([[1, 1, 1],
       [1, 0, 1]])

In [21]:
# Define a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=1000)  # Adjust the number of features as needed

# Create a list to store features
X = []

# Loop through each trope and perform EDA
for trope_name in trope_names:
    # Filter the data for the current trope
    trope_data = ten_tropes_df[ten_tropes_df["Trope Name"] == trope_name]

    # Extract the tokenized, lemmatized, and cleaned text data (features)
    all_words = " ".join(trope_data["text_lemma"])
    all_words = word_tokenize(all_words)
    filtered_words = [word for word in all_words if word.lower() not in nltk_stopwords]
    filtered_words = [word for word in filtered_words if word.lower() not in custom_stopwords.get(trope_name, [])]

    # Add features to X
    X.append(" ".join(filtered_words))  # Convert the list of words back to a single string

# Vectorize the features using TF-IDF
X_tfidf = tfidf_vectorizer.fit_transform(X)

# Perform clustering (e.g., K-means clustering) and specify n_init
n_clusters = 5  # Adjust the number of clusters as needed
n_init_value = 10  # Set the value explicitly to suppress the warning
kmeans = KMeans(n_clusters=n_clusters, n_init=n_init_value, random_state=0)
kmeans.fit(X_tfidf)

# Assign cluster labels to each trope
cluster_labels = kmeans.labels_

# Print out the cluster labels for each trope
for trope_name, label in zip(trope_names, cluster_labels):
    print(f"Trope: {trope_name}, Cluster Label: {label}")

# Now you have cluster labels for your tropes

Trope: Imagine Spot, Cluster Label: 3
Trope: Couch Gag, Cluster Label: 0
Trope: Catchphrase, Cluster Label: 4
Trope: Comically Missing the Point, Cluster Label: 1
Trope: Running Gag, Cluster Label: 0
Trope: Disproportionate Retribution, Cluster Label: 1
Trope: Establishing Character Moment, Cluster Label: 2
Trope: Karma Houdini, Cluster Label: 1
Trope: Characterization Marches On, Cluster Label: 2
Trope: Special Guest, Cluster Label: 4


In [28]:
# Assuming cluster_labels is a list of cluster labels for each trope
# For example, cluster_labels = [3, 0, 4, 1, 0, 1, 2, 1, 2, 4]

# Create binary labels based on cluster labels
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
y_combined = mlb.fit_transform([f"Cluster{cluster_label}" for cluster_label in cluster_labels])

# Now y_combined is a binary label matrix that can be used with make_multilabel_classification

# Print the transformed binary labels
y_combined

array([[0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

In [29]:
# Print the inverse transformation to check the mapping
original_labels = mlb.inverse_transform(y_combined)
original_labels

[('3', 'C', '[', ']', 'e', 'l', 'r', 's', 't', 'u'),
 ('0', 'C', '[', ']', 'e', 'l', 'r', 's', 't', 'u'),
 ('4', 'C', '[', ']', 'e', 'l', 'r', 's', 't', 'u'),
 ('1', 'C', '[', ']', 'e', 'l', 'r', 's', 't', 'u'),
 ('0', 'C', '[', ']', 'e', 'l', 'r', 's', 't', 'u'),
 ('1', 'C', '[', ']', 'e', 'l', 'r', 's', 't', 'u'),
 ('2', 'C', '[', ']', 'e', 'l', 'r', 's', 't', 'u'),
 ('1', 'C', '[', ']', 'e', 'l', 'r', 's', 't', 'u'),
 ('2', 'C', '[', ']', 'e', 'l', 'r', 's', 't', 'u'),
 ('4', 'C', '[', ']', 'e', 'l', 'r', 's', 't', 'u')]