In [92]:
#Install all needed dependencies
import pandas as pd
import numpy as np 
import plotly.express as px
import seaborn as sns
from math import floor
from dash import Dash, dcc, html, Input, Output

In [93]:
#Read in csv into a pandas data frame
df = pd.read_csv("data.csv")
# load the CSS stylesheet
stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] 
#Initialize the Dash app
app = Dash(__name__, external_stylesheets=stylesheets)

In [94]:
#View the first five rows of the data frame
print(df.head())
#Get number of rows and cols
print(df.shape)
#Get the data types of the cols
print(df.dtypes)

               Series_Title  Released_Year Certificate  Runtime  IMDB_Rating  \
0  The Shawshank Redemption           1994           A    142.0          9.3   
1             The Godfather           1972           A    175.0          9.2   
2           The Dark Knight           2008          UA    152.0          9.0   
3    The Godfather: Part II           1974           A    202.0          9.0   
4              12 Angry Men           1957           U     96.0          9.0   

   Meta_score              Director           Star1           Star2  \
0        80.0        Frank Darabont     Tim Robbins  Morgan Freeman   
1       100.0  Francis Ford Coppola   Marlon Brando       Al Pacino   
2        84.0     Christopher Nolan  Christian Bale    Heath Ledger   
3        90.0  Francis Ford Coppola       Al Pacino  Robert De Niro   
4        96.0          Sidney Lumet     Henry Fonda     Lee J. Cobb   

           Star3  ... Horror  Music  Musical  Mystery  Romance  Sci-Fi  Sport  \
0     Bob G

In [95]:
#sorting years to get the min and max years
years = sorted(int(year) for year in df['Released_Year'].unique())
# Extract genre names from the column headers
genre_columns = df.columns[df.columns.get_loc('Gross')+1:]

@app.callback(
    Output('line-graph', 'figure'),
    [Input('genre-dropdown', 'value'),
     Input('year-slider', 'value')]
)
def update_graph(selected_genres, selected_years):
    if not selected_genres:
        selected_genres = genre_columns.tolist()  # Default to all genres if none are selected
    if not selected_years:
        selected_years = [min(years), max(years)]
    
    # Filter the dataframe for the selected years
    filtered_df = df[df['Released_Year'].between(selected_years[0], selected_years[1])]
    
    # Define a function to assign each year to a three-year bin
    def year_to_bin(year):
        base_year = min(years)
        return f"{int(floor((year - base_year) / 3) * 3 + base_year)}-{int(floor((year - base_year) / 3) * 3 + base_year + 2)}"
    
    # Apply the function to create a new 'Year_Bin' column
    filtered_df['Year_Bin'] = filtered_df['Released_Year'].apply(year_to_bin)
    
    # Aggregate the genre data over the three-year bins
    genre_data = filtered_df.loc[:, selected_genres + ['Year_Bin']]
    genre_popularity = genre_data.groupby('Year_Bin')[selected_genres].sum()
    
    # Reset index to make 'Year_Bin' a column again for plotting
    genre_popularity = genre_popularity.reset_index()
    
    # Convert dataframe to long format for Plotly
    df_plottable = pd.melt(genre_popularity, id_vars=['Year_Bin'], var_name='Genre', value_name='Popularity')
    
    # Create the stacked area chart
    fig = px.area(df_plottable,
                  x='Year_Bin',
                  y='Popularity',
                  color='Genre',
                  line_group='Genre',
                  title='Genre Popularity Over Three-Year Bins',
                  labels={'Popularity': 'Number of Movies', 'Year_Bin': 'Year Bin'})

    return fig

@app.callback(
    Output('bubble-chart', 'figure'),
    [Input('genre-dropdown', 'value'),
     Input('year-slider', 'value')]
)
def update_bubble_chart(selected_genres, selected_years):
    # Filter dataframe based on inputs
    filtered_df = df[df['Released_Year'].between(selected_years[0], selected_years[1])]
    if selected_genres:
        filtered_df = filtered_df[filtered_df['Genre'].isin(selected_genres)]
    
    # Create the bubble chart using Plotly Express
    fig = px.scatter(
        filtered_df,
        x='IMDB_Rating',
        y='Gross',
        size='No_of_Votes',  # Assuming 'No_of_Votes' is the column name for vote count
        color='Series_Title',  # This can be changed to any column as per your needs
        hover_name='Series_Title',  # Shows movie title on hover
        title='IMDB Rating vs Gross Revenue by Votes',
        labels={
            'IMDB_Rating': 'IMDB Rating',
            'Gross_Revenue': 'Gross Revenue ($)',
            'No_of_Votes': 'Number of Votes'
        },
        size_max=60
    )

    return fig

In [96]:
#App layout to define the structure of the web page
app.layout = html.Div([
    #Contaier div to store all the components
    html.Div(children=[
        #Title describing the purpose of the web app\
        html.H1('Movie Data Visualization Dashboard', style={'textAlign': 'center', 'width': '100%'}), 
        html.Div(children=[
            dcc.Graph(id='line-graph'),  # Graph component where the line graph will be displayed
            dcc.Graph(id='bubble-chart'),
            ], className='twelve columns', style={'padding': '0 20px'}),  # Additional padding if needed
        html.Div(children=[
            
            html.Div(children=[
                html.Div(
                    # Movie dropdown component
                    dcc.Dropdown(
                        # Identifier for the movie dropdown component
                        id='movie-dropdown',  
                        #Using the dataframe to populate the options by creating key value pairs with unique movie titles
                        options=[{'label': title, 'value': title} for title in df['Series_Title'].unique()],  
                        #Custom placeholder text to describe what actions to take as a user
                        placeholder = 'Select One or More Movie Title',
                        # Allows user to select multiple movies at once
                        multi=True,  
                    # Styling to fit half the width
                    ), className = 'twelve columns', style={'margin-bottom': '50px', 'margin-top': '100px'}
                ),
                html.Div(
                    # Director dropdown component
                    dcc.Dropdown(
                        # Identifier for the director dropdown component
                        id='director-dropdown',  
                        #Using the dataframe to populate the options by creating key value pairs with unique director names
                        options=[{'label': director, 'value': director} for director in df['Director'].unique()],  
                        #Custom placeholder text to describe what actions to take as a user
                        placeholder = 'Select One or More Director',
                        # Allows user to select multiple directors at once
                        multi=True,  
                    # Styling to fit half the width
                    ), className = 'twelve columns', style={'margin-bottom': '50px'}
                ),
                html.Div(
                    # Genre dropdown component
                    dcc.Dropdown(
                        # Identifier for the genre dropdown component
                        id='genre-dropdown',  
                        #Using the dataframe to populate the options by creating key value pairs with unique genres
                        options=[{'label': genre, 'value': genre} for genre in genre_columns.unique()],  
                        #Custom placeholder text to describe what actions to take as a user
                        placeholder = 'Select One or More Genre',
                        # Allows user to select multiple genres at once
                        multi=True,  
                    # Styling to fit half the width
                    ), className = 'twelve columns', style={'margin-bottom': '50px'}
                ),
                html.Div(
                    # Year range slider
                    dcc.RangeSlider(
                        # Identifier for the year range slider component
                        id='year-slider',
                        # Lower bound as determined from our sorted year array 
                        min=years[0],
                        # Upper bound as determined from our sorted year array 
                        max=years[-1],
                        # Default to having full range selected
                        value=[years[0], years[-1]],
                        # Put a mark every 10 years for readability
                        marks={str(year): str(year) for year in years[::8]},
                    # Styling to fit half the width
                    ), className = 'twelve columns', style={'margin-bottom': '50px'}
                ),     
            ], className = 'six columns', style={'alignItems': 'flex-end', 'display': 'flex', 'flexDirection': 'column'}),
        ], className='row', style={'display': 'flex', 'alignItems': 'flex-start', 'width': '100%'}),  # Ensure this is flex-start to align the items at the top
    
    #Styling for the container div that has all components stored in column direction and centers them
    ], style={'display': 'flex', 'height': '100vh', 'flex-direction': 'column'}),  
])

#Run the app
if __name__ == '__main__':
    app.run(jupyter_mode='tab', debug=True)

Dash app running on http://127.0.0.1:8050/


<IPython.core.display.Javascript object>



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/core/indexes/base.py:3802, in Index.get_loc(
    self=Index(['Series_Title', 'Released_Year', 'Certifi...riller', 'War', 'Western'],
      dtype='object'),
    key='Genre'
)
   3801 try:
-> 3802     return self._engine.get_loc(casted_key)
        casted_key = 'Genre'
        self = Index(['Series_Title', 'Released_Year', 'Certificate', 'Runtime',
       'IMDB_Rating', 'Meta_score', 'Director', 'Star1', 'Star2', 'Star3',
       'Star4', 'No_of_Votes', 'Gross', 'Action', 'Adventure', 'Animation',
       'Biography', 'Comedy', 'Crime', 'Drama', 'Family', 'Fantasy',
       'Film-Noir', 'History', 'Horror', 'Music', 'Musical', 'Mystery',
       'Romance', 'Sci-Fi', 'Sport', 'Thriller', 'War', 'Western'],
      dtype='object')
   3803 except KeyError a



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

