# Informationen für dieses Notebook

Dieses .ipnynb dient dazu, den Datensatz "df_movie_for_streamlit.csv" (aus Ordner "Datasets") zu importieren und diesen so zu bearbeiten, dass er für die Film-Filter Funktionalität genutzt werden kann. Der bereinigte Datensatz wird als "df_movie_filter.csv" im Ordner "Datasets" abgespeichert. Die Datei "filter_functions.py" greift dann auf dieses Dataset zu und benutzt es für die Anwendung in der Streamlit App.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
%matplotlib inline
sns.set_theme(style="darkgrid", palette="bright", context="paper")

In [3]:
# Maximale Anzahl an Spalten und Breite festlegen
pd.set_option('display.max_columns', None)  # Zeigt alle Spalten an
pd.set_option('display.width', 1000)  # Breite des Outputs in Zeichen

In [4]:
# CSV-Datei aus dem Ordner Datasets laden
df_movie_filter = pd.read_csv('../../Datasets/df_movie_for_streamlit.csv')

# DataFrame anzeigen 
df_movie_filter.head(5)

Unnamed: 0,id,Title,Movie Link,Year,Duration,MPA,Rating,Votes,budget,grossWorldWide,gross_US_Canada,opening_weekend_Gross,directors,writers,stars,genres,countries_origin,filming_locations,production_companies,Languages,wins,nominations,oscars
0,tt0073195,Jaws,https://www.imdb.com/title/tt0073195,1975,124.0,PG,8.1,683000.0,7000000.0,477220580.0,266567580.0,7061513.0,['Steven Spielberg'],"['Peter Benchley', 'Carl Gottlieb']","['Roy Scheider', 'Robert Shaw', 'Richard Dreyf...","['Monster Horror', 'Sea Adventure', 'Survival'...",['United States'],"[""Water Street, Edgartown, Martha's Vineyard, ...","['Zanuck/Brown Productions', 'Universal Pictur...",['English'],0,20,0
1,tt0073629,The Rocky Horror Picture Show,https://www.imdb.com/title/tt0073629,1975,100.0,R,7.4,173000.0,1200000.0,115798478.0,112892319.0,,['Jim Sharman'],"[""Richard O'Brien"", 'Jim Sharman']","['Tim Curry', 'Susan Sarandon', 'Barry Bostwick']","['Dark Comedy', 'Raunchy Comedy', 'Rock Musica...","['United Kingdom', 'United States']","['Oakley Court, Windsor Road, Oakley Green, Wi...","['Twentieth Century Fox', 'Michael White Produ...",['English'],0,4,0
2,tt0073486,One Flew Over the Cuckoo's Nest,https://www.imdb.com/title/tt0073486,1975,133.0,R,8.7,1.0,3000000.0,109115366.0,108981275.0,,['Milos Forman'],"['Lawrence Hauben', 'Bo Goldman', 'Ken Kesey']","['Jack Nicholson', 'Louise Fletcher', 'Michael...","['Medical Drama', 'Psychological Drama', 'Drama']",['United States'],['Oregon State Mental Hospital - 2600 Center S...,"['Fantasy Films', 'N.V. Zvaluw']",['English'],0,15,0
3,tt0072890,Dog Day Afternoon,https://www.imdb.com/title/tt0072890,1975,125.0,R,8.0,279000.0,1800000.0,50002721.0,50000000.0,,['Sidney Lumet'],"['Frank Pierson', 'P.F. Kluge', 'Thomas Moore']","['Al Pacino', 'John Cazale', 'Penelope Allen']","['Heist', 'True Crime', 'Biography', 'Crime', ...",['United States'],"['285 Prospect Park West, Brooklyn, New York C...","['Warner Bros.', 'Artists Entertainment Complex']",['English'],0,20,0
4,tt0073692,Shampoo,https://www.imdb.com/title/tt0073692,1975,110.0,R,6.4,15000.0,4000000.0,49407734.0,49407734.0,,['Hal Ashby'],"['Robert Towne', 'Warren Beatty']","['Warren Beatty', 'Julie Christie', 'Goldie Ha...","['Satire', 'Comedy', 'Drama']",['United States'],"['2270 Bowmont Drive, Beverly Hills, Californi...","['Persky-Bright / Vista', 'Columbia Pictures',...",['English'],0,11,0


In [5]:
df_movie_filter.columns

Index(['id', 'Title', 'Movie Link', 'Year', 'Duration', 'MPA', 'Rating', 'Votes', 'budget', 'grossWorldWide', 'gross_US_Canada', 'opening_weekend_Gross', 'directors', 'writers', 'stars', 'genres', 'countries_origin', 'filming_locations', 'production_companies', 'Languages', 'wins', 'nominations', 'oscars'], dtype='object')

In [6]:
# Nur die angegebenen Spalten in der gewünschten Reihenfolge behalten
df_movie_filter = df_movie_filter[['Title', 'Year', 'Duration', 'Rating', 'genres', 'oscars', 'stars', 'directors', 'MPA', 'Movie Link']]

# Das Ergebnis anzeigen 
df_movie_filter.head(5)


Unnamed: 0,Title,Year,Duration,Rating,genres,oscars,stars,directors,MPA,Movie Link
0,Jaws,1975,124.0,8.1,"['Monster Horror', 'Sea Adventure', 'Survival'...",0,"['Roy Scheider', 'Robert Shaw', 'Richard Dreyf...",['Steven Spielberg'],PG,https://www.imdb.com/title/tt0073195
1,The Rocky Horror Picture Show,1975,100.0,7.4,"['Dark Comedy', 'Raunchy Comedy', 'Rock Musica...",0,"['Tim Curry', 'Susan Sarandon', 'Barry Bostwick']",['Jim Sharman'],R,https://www.imdb.com/title/tt0073629
2,One Flew Over the Cuckoo's Nest,1975,133.0,8.7,"['Medical Drama', 'Psychological Drama', 'Drama']",0,"['Jack Nicholson', 'Louise Fletcher', 'Michael...",['Milos Forman'],R,https://www.imdb.com/title/tt0073486
3,Dog Day Afternoon,1975,125.0,8.0,"['Heist', 'True Crime', 'Biography', 'Crime', ...",0,"['Al Pacino', 'John Cazale', 'Penelope Allen']",['Sidney Lumet'],R,https://www.imdb.com/title/tt0072890
4,Shampoo,1975,110.0,6.4,"['Satire', 'Comedy', 'Drama']",0,"['Warren Beatty', 'Julie Christie', 'Goldie Ha...",['Hal Ashby'],R,https://www.imdb.com/title/tt0073692


In [7]:
# Einzigartige Genres extrahieren
unique_genres = set()
for genres in df_movie_filter['genres'].dropna():
    unique_genres.update(eval(genres) if isinstance(genres, str) else genres)

print("Unique Genres:")
print(sorted(unique_genres))

# Einzigartige MPA-Werte extrahieren
unique_mpa = df_movie_filter['MPA'].dropna().unique()
print("\nUnique MPA Ratings:")
print(sorted(unique_mpa))

Unique Genres:
['Action', 'Action Epic', 'Adult Animation', 'Adventure', 'Adventure Epic', 'Alien Invasion', 'Animal Adventure', 'Animation', 'Anime', 'Artificial Intelligence', 'B-Action', 'B-Horror', 'Baseball', 'Basketball', 'Biography', 'Body Horror', 'Body Swap Comedy', 'Boxing', 'Buddy Comedy', 'Buddy Cop', 'Bumbling Detective', 'Caper', 'Car Action', 'Classic Musical', 'Classical Western', 'Comedy', 'Coming-of-Age', 'Computer Animation', 'Concert', 'Conspiracy Thriller', 'Contemporary Western', 'Cop Drama', 'Costume Drama', 'Cozy Mystery', 'Crime', 'Crime Documentary', 'Cyber Thriller', 'Cyberpunk', 'Dark Comedy', 'Dark Fantasy', 'Dark Romance', 'Desert Adventure', 'Dinosaur Adventure', 'Disaster', 'Docudrama', 'Documentary', 'Drama', 'Drug Crime', 'Dystopian Sci-Fi', 'Epic', 'Erotic Thriller', 'Extreme Sport', 'Fairy Tale', 'Faith & Spirituality Documentary', 'Family', 'Fantasy', 'Fantasy Epic', 'Farce', 'Feel-Good Romance', 'Financial Drama', 'Folk Horror', 'Food Documentary',

In [8]:
df_movie_filter[df_movie_filter['Duration'] > 600]


Unnamed: 0,Title,Year,Duration,Rating,genres,oscars,stars,directors,MPA,Movie Link
12347,****,1967,1500.0,6.2,['Drama'],0,"['Brigid Berlin', 'Tally Brown', 'David Croland']",['Andy Warhol'],TV-MA,https://www.imdb.com/title/tt0179184
12774,The Cure for Insomnia,1987,5220.0,4.6,"['Documentary', 'Music']",0,"['Cosmic Lightning', 'L.D. Groban', 'J.T.4']",['John Henry Timmis IV'],Unrated,https://www.imdb.com/title/tt0284020
16052,The Longest Most Meaningless Movie in the World,1970,2880.0,5.6,"['Documentary', 'Drama', 'History']",0,['Bob Seely'],['Vincent Patouillard'],TV-14,https://www.imdb.com/title/tt0342707
32567,Out 1,1971,776.0,7.5,"['Drama', 'Mystery']",0,"['Michèle Moretti', 'Hermine Karagheuz', 'Kare...","['Jacques Rivette', 'Suzanne Schiffman']",Not Rated,https://www.imdb.com/title/tt0246135


In [9]:
df_movie_filter.to_csv('../../Datasets/df_movie_filter.csv')

print("Datei wurde erfolgreich gespeichert!")

Datei wurde erfolgreich gespeichert!
