In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("data/streaming_titles.csv")

In [2]:
# Get genres as a set

genre_list_raw = df["listed_in"].to_list()

genres = set()
for elem in genre_list_raw:
    if type(elem) != str:
        continue
    splitted = elem.split(', ')
    for k in splitted:
        genres.add(k)

print(sorted(list(genres)))
print(len(genres))

['Action', 'Action & Adventure', 'Action-Adventure', 'Adult Animation', 'Adventure', 'Animals & Nature', 'Animation', 'Anime', 'Anime Features', 'Anime Series', 'Anthology', 'Arthouse', 'Arts', 'Biographical', 'Black Stories', 'British TV Shows', 'Buddy', 'Cartoons', 'Children & Family Movies', 'Classic & Cult TV', 'Classic Movies', 'Classics', 'Comedies', 'Comedy', 'Coming of Age', 'Concert Film', 'Cooking & Food', 'Crime', 'Crime TV Shows', 'Cult Movies', 'Dance', 'Disaster', 'Documentaries', 'Documentary', 'Docuseries', 'Drama', 'Dramas', 'Entertainment', 'Faith & Spirituality', 'Faith and Spirituality', 'Family', 'Fantasy', 'Fitness', 'Game Show / Competition', 'Game Shows', 'Health & Wellness', 'Historical', 'History', 'Horror', 'Horror Movies', 'Independent Movies', 'International', 'International Movies', 'International TV Shows', 'Kids', "Kids' TV", 'Korean TV Shows', 'LGBTQ', 'LGBTQ Movies', 'LGBTQ+', 'Late Night', 'Latino', 'Lifestyle', 'Lifestyle & Culture', 'Medical', 'Mili

A quick look:
There appears to be multiple of the same genre, as in, 
- Action-Adventure, Action & Adventure 
- Documentaries, Documentary, Docuseries

There is also an issue because one genre is called "Arts, Entertainment, and Culture" which gets parsed as three different genres.

We should check to make sure no other genre names have commas in them like this.

For now, lets start by grouping each genre:

In [3]:
genre_reduction = {"Action & Adventure" : ["Action","Adventure"],
                   "Action-Adventure" : ["Action","Adventure"],
                   "Animals & Nature" : ["Nature"],
                   "Anime Features" : ["Anime"],
                   "Anime Series" : ["Anime"],
                   "Children & Family Movies" : ["Kids","Family"],
                   "Classic & Cult TV" : ["Classics","Cult"],
                   "Comedies" : ["Comedy"],
                   "Concert Film" : ["Music"],
                   "Crime TV Shows" : ["Crime"],
                   "Cult Movies" : ["Cult"],
                   "Documentaries" : ["Documentary"],
                   "Docuseries" : ["Documentary"],
                   "Dramas" : ["Drama"],
                   "Faith & Spirituality" : ["Faith and Spirituality"],
                   "Game Show / Competition" : ["Game Shows"],
                   "Historical" : ["History"],
                   "Horror Movies" : ["Horror"],
                   "International Movies" : ["International"],
                   "International TV Shows" : ["International"],
                   "Kid's TV" : ["Kids"],
                   "Late Night" : ["Talk Show"],
                   "LGBTQ Movies" : ["LGBTQ"],
                   "LGBTQ+" : ["LGBTQ"],
                   "Lifestyle & Culture": ["Lifestyle","Culture"],
                   "Music & Musicals" : ["Music", "Musical"],
                   "Music Videos and Concerts" : ["Music"],
                   "Reality TV" : ["Reality"],
                   "Romantic Comedy" : ["Romance", "Comedy"],
                   "Romantic TV Shows" : ["Romance"],
                   "Sci-Fi & Fantasy" : ["Science Fiction", "Fantasy"],
                   "Science & Nature TV" : ["Science & Technology","Nature"],
                   "Spanish-Language TV Shows" : ["Latino"],
                   "Sports Movies" : ["Sports"],
                   "Stand-Up Comedy" : ["Stand Up"],
                   "Stand-Up Comedy & Talk Shows" : ["Stand Up","Talk Show"],
                   "TV Action & Adventure" : ["Action", "Adventure"],
                   "TV Comedies" : ["Comedies"],
                   "TV Dramas" : ["Drama"],
                   "TV Horror" : ["Horror"],
                   "TV Mysteries" : ["Mystery"],
                   "TV Sci-Fi & Fantasy" : ["Science Fiction", "Fantasy"],
                   "TV Thrillers" : ["Thriller"],
                   "Talk Show and Variety" : ["Talk Show","Variety"],
                   "Teen TV Shows" : ["Teen"],
                   "Thrillers" : ["Thriller"],

                   "Arts" : ["Arts, Entertainment, and Culture"],
                   "Entertainment" : ["Arts, Entertainment, and Culture"],
                   "and Culture" : ["Arts, Entertainment, and Culture"]
                   }

Here I made some subjective decisions about what qualifies as its own genre. There are obvious simplifications like the ones above, but then other choices which are not so obvious. For example, should `Sitcom` be simplified to `Comedy`? For this example I said no, but in other cases I did make the simplication, like `Animals & Nature` to `Nature`.

It was not apparent to me that any other categories were formatted like `Arts, Entertainment, and Culture`.

Let's now go through and add these genres as separate columns in our data.

In [7]:
df = pd.read_csv("data/streaming_titles.csv")
for i in range(len(df)):
    elem = df.loc[i,'listed_in']
    if type(elem) != str:
        continue
    splitted = elem.split(', ')
    for k in splitted:
        key = [k]
        if k in genre_reduction:
            key = genre_reduction[k]
        for l in key:
            df.loc[i,"genre." + l.replace(" ", "_")] = True

df.fillna({x : False for x in df.columns[pd.Series(df.columns).str.startswith("genre")]}, inplace=True)

df


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,...,genre.Medical,genre.Spy/Espionage,genre.Buddy,genre.Parody,genre.Police/Cop,genre.Dance,genre.Series,genre.Soap_Opera_/_Melodrama,genre.Disaster,genre.Travel
0,s1,Movie,Ricky Velez: Here's Everything,,,,"October 24, 2021",2021,TV-MA,,...,False,False,False,False,False,False,False,False,False,False
1,s2,Movie,Silent Night,,,,"October 23, 2021",2020,,94 min,...,False,False,False,False,False,False,False,False,False,False
2,s3,Movie,The Marksman,,,,"October 23, 2021",2021,PG-13,108 min,...,False,False,False,False,False,False,False,False,False,False
3,s4,Movie,Gaia,,,,"October 22, 2021",2021,R,97 min,...,False,False,False,False,False,False,False,False,False,False
4,s5,Movie,Settlers,,,,"October 22, 2021",2021,,104 min,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22993,s1446,Movie,X-Men Origins: Wolverine,Gavin Hood,"Hugh Jackman, Liev Schreiber, Danny Huston, wi...","United States, United Kingdom","June 4, 2021",2009,PG-13,108 min,...,False,False,False,False,False,False,False,False,False,False
22994,s1447,Movie,Night at the Museum: Battle of the Smithsonian,Shawn Levy,"Ben Stiller, Amy Adams, Owen Wilson, Hank Azar...","United States, Canada","April 2, 2021",2009,PG,106 min,...,False,False,False,False,False,False,False,False,False,False
22995,s1448,Movie,Eddie the Eagle,Dexter Fletcher,"Tom Costello, Jo Hartley, Keith Allen, Dickon ...","United Kingdom, Germany, United States","December 18, 2020",2016,PG-13,107 min,...,False,False,False,False,False,False,False,False,False,False
22996,s1449,Movie,Bend It Like Beckham,Gurinder Chadha,"Parminder Nagra, Keira Knightley, Jonathan Rhy...","United Kingdom, Germany, United States","September 18, 2020",2003,PG-13,112 min,...,False,False,True,False,False,False,False,False,False,False


In [5]:
df[df["genre.Buddy"] == True]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,...,genre.Medical,genre.Spy/Espionage,genre.Buddy,genre.Parody,genre.Police/Cop,genre.Dance,genre.Series,genre.Soap_Opera_/_Melodrama,genre.Disaster,genre.Travel
21627,s80,TV Show,The Ghost and Molly McGee,,"Ashly Burch, Dana Snyder",United States,"October 6, 2021",2021,TV-Y7,1 Season,...,,,True,,,,,,,
21693,s146,TV Show,Chip 'n' Dale: Park Life,,"Matthew Géczy, Kaycie Chase, Bill Farmer, Sylv...",,"July 28, 2021",2021,TV-Y7,1 Season,...,,,True,,,,,,,
21695,s148,TV Show,Disney Junior T.O.T.S.,,"Vanessa Williams, Megan Hilty, Jet Jurgensmeye...",United States,"July 28, 2021",2018,TV-Y,2 Seasons,...,,,True,,,,,,,
21710,s163,TV Show,Turner & Hooch,,"Josh Peck, Carra Patterson, Lyndsy Fonseca, Va...",,"July 21, 2021",2021,TV-PG,1 Season,...,,,True,,,,,,,
21742,s195,Movie,Luca,Enrico Casarosa,"Jacob Tremblay, Jack Dylan Grazer, Emma Berman...",United States,"June 18, 2021",2021,PG,101 min,...,,,True,,,,,,,
21826,s279,TV Show,The Falcon and The Winter Soldier,,"Anthony Mackie, Sebastian Stan, Daniel Brühl, ...",,"March 19, 2021",2021,TV-14,1 Season,...,,,True,,,,,,,
21833,s286,TV Show,Car SOS,,,United Kingdom,"February 26, 2021",2012,TV-PG,8 Seasons,...,,,True,,,,,,,
21920,s373,TV Show,Goldie & Bear,,"Georgie Kidder, Justine Huxley, Natalie Lander...",United States,"November 6, 2020",2015,TV-Y,2 Seasons,...,,,True,,,,,,,
22011,s464,TV Show,Rogue Trip,,"Bob Woodruff, Mack Woodruff",,"July 24, 2020",2020,TV-PG,1 Season,...,,,True,,,,,,,
22106,s559,TV Show,Disney Mickey and the Roadster Racers - Chip '...,,"Tress MacNeille, Corey Burton, Bret Iwan, Russ...",,"April 17, 2020",2017,TV-Y,2 Seasons,...,,,True,,,,,,,
