<a href="https://colab.research.google.com/github/devencentyk/datavisualization/blob/main/HarryPotterSpells_DashApp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Required Imports

In [None]:
!pip install jupyter-dash
!pip install dash-bootstrap-components
!pip install plotly jupyterlab-d3
!pip install -U dash
!pip install session_info 

In [23]:
import pandas as pd
import jupyter_dash
from pandas.core.arrays.sparse import dtype
import plotly.graph_objects as go
import plotly.express as px
from jupyter_dash import JupyterDash
from dash import dcc
from dash import html
from dash import dash_table
import numpy as np
from IPython.display import Javascript
from IPython.display import display, HTML
import session_info 
import matplotlib.pyplot as plt
import regex as re

<IPython.core.display.Javascript object>

# Importing Data

In [27]:
#from google.colab import drive
#drive.mount('/content/drive')

# Read in the CSV files and store them in a list of DataFrames
data = []
for i in range(1, 9):
    filename = f"hp{i}.csv"
    df = pd.read_csv('https://raw.githubusercontent.com/devencentyk/datavisualization/main/{}'.format(filename))
    df['book'] = f"Book {i}"
    data.append(df)

# Concatenate the DataFrames along the rows
df = pd.concat(data, ignore_index=True)

# Convert 'dialog' column to string type
df['dialog'] = df['dialog'].astype(str)
df2 = df.copy()

# Group the dialogues by book
df_dialog = df.groupby('movie')['dialog'].apply(' '.join).reset_index()

<IPython.core.display.Javascript object>

In [5]:
spell_df = pd.read_csv('https://raw.githubusercontent.com/devencentyk/datavisualization/main/Spells.csv')

# Processing

In [26]:
#Renaming movie column

df_dialog.loc[0, 'movie'] = 'Chamber of Secrets'
df_dialog.loc[1, 'movie'] = 'Deathly Hallows Part 1'
df_dialog.loc[2, 'movie'] = 'Deathly Hallows Part 2'
df_dialog.loc[3, 'movie'] = 'Goblet of Fire'
df_dialog.loc[4, 'movie'] = 'Half-Blood Prince'
df_dialog.loc[5, 'movie'] = 'Order of the Phoenix'
df_dialog.loc[6, 'movie'] = 'Philosopher\'s Stone'
df_dialog.loc[7, 'movie'] = 'Prisoner of Azkaban'

<IPython.core.display.Javascript object>

In [6]:
# USING REGEX to count spells

movies = [
    'Philosopher\'s Stone',
    'Chamber of Secrets',
    'Prisoner of Azkaban',
    'Goblet of Fire',
    'Order of the Phoenix',
    'Half-Blood Prince',
    'Deathly Hallows Part 1',
    'Deathly Hallows Part 2'
]

# Create dictionary to store spell counts for each movie
spell_counts = {movie: {} for movie in movies}

# Loop through each movie and count spell occurrences
for movie in movies:
  subset_df = df_dialog.loc[df_dialog['movie']==movie].reset_index()
    
  # Initialize spell counts for this movies
  spell_counts[movie] = {spell: 0 for spell in spell_df['Incantation']}
    
  # Loop through each spell and count occurrences in this movie
  for spell in spell_df['Incantation']:
    spell_counts[movie][spell] = len(re.findall(r'\b(?i)'+ spell + r'\b', subset_df.loc[0,'dialog'], re.IGNORECASE))

# Convert counts to a DataFrame
counts_df = pd.DataFrame.from_dict(spell_counts)

counts_df = counts_df.loc[counts_df.sum(axis=1).sort_values(ascending=False).index]

In [38]:
# Counting how many times each character cast a spell 

# Group the dialogue by character
df_chars = df2.groupby('character')['dialog'].apply(' '.join).reset_index()

chars = df_chars['character'].unique()

# Create dictionary to store spell counts for each movie
spell_counts = {char: {} for char in chars}

# Loop through each movie and count spell occurrences
for char in chars:
  subset_df = df_chars.loc[df_chars['character']==char].reset_index()
    
  # Initialize spell counts for this movies
  spell_counts[char] = {spell: 0 for spell in spell_df['Incantation']}
    
  # Loop through each spell and count occurrences in this movie
  for spell in spell_df['Incantation']:

    spell_counts[char][spell] = len(re.findall(r'\b(?i)'+ spell + r'\b', subset_df.loc[0,'dialog'], re.IGNORECASE))

spell_counts

# Convert counts to a DataFrame
char_counts_df = pd.DataFrame.from_dict(spell_counts)

char_counts_df = char_counts_df.sum(axis=0)
char_counts_df = pd.DataFrame(char_counts_df)
char_counts_df = char_counts_df.reset_index()
char_counts_df.rename(columns={"index": "Character", 0 : "Spells Cast"}, inplace=True)


#Counting num of chars who actually cast spells
spellcasters = []


for char in chars:
  if (char_counts_df.loc[char_counts_df['Character']==char]['Spells Cast'] != 0).bool():
    spellcasters.append(char)

char_counts_df = char_counts_df.drop(char_counts_df[char_counts_df['Character'] == 'Professors'].index)
char_counts_df = char_counts_df.drop(char_counts_df[char_counts_df['Character'] == 'All'].index)
char_counts_df = char_counts_df.sort_values(by='Spells Cast', ascending=False)

top_spellcasters = char_counts_df.head(10)

<IPython.core.display.Javascript object>

# Making graphs

In [39]:
#Making charts
df = counts_df

m_colors = {'Philosopher\'s Stone': '#946B2D',
          'Chamber of Secrets': '#740001',
          'Prisoner of Azkaban': '#d3a625',
          'Goblet of Fire': '#222f5b',
          'Order of the Phoenix': '#ae0001',
          'Half-Blood Prince': '#aaaaaa',
          'Deathly Hallows Part 1': '#2E2E2E',
          'Deathly Hallows Part 2': '#2a623d'}



# create stacked bar chart
fig = go.Figure(data=[go.Bar(x=counts_df.index[0:-4], y=counts_df[movie],
                             name=movie,
                             hovertemplate=
                              "<b>%{x}</b><br>" + 
                              "Cast %{y} time(s) in The " + movie + '<extra></extra>',
                             marker_color=m_colors[movie]) for movie in counts_df.columns])



# customize layout
fig.update_layout(title='Most Commonly Cast Spells Throughout Movie Series',
                  xaxis_title='Spell',
                  yaxis_title='Times Cast',
                  barmode='stack',
                  showlegend = True,
                  legend=dict(font = dict(size=18)),
                  font=dict(
                        family='Cochin',
                        size=18,
                        color = 'black'

                  )
                  )


fig.update_layout(
    hoverlabel=dict(
        font_size=19,
        font_family="Cochin"),
    plot_bgcolor='#e9e7e2',
    paper_bgcolor='#d4d1c9'
    )

fig.update_xaxes(tickangle=45)

fig.update_yaxes(ticks = "outside", tickcolor='#d4d1c9', ticklen=10)

fig.update_traces(marker=dict(line=dict(width=2,
                                        color='black')))


fig.update_layout(
    xaxis=dict(
        showline=True,
        linewidth=1.5,
        linecolor='black'
    )
)







###################Next graph#############################


fig2 = px.bar(top_spellcasters, x='Spells Cast', y='Character',  orientation='h',
              text_auto=True,
             color='Character', color_discrete_sequence=['#740001', '#d3a625', '#222f5b', '#946B2D','#ae0001','#aaaaaa','#2E2E2E','#2a623d','#000000','#ffd800'],
             hover_data={'Character': False, 'Spells Cast': ':.0f', 'Spells Cast': True})

# customize the layout
fig2.update_layout(title='Top 10 Spell Casters', 
                  yaxis_title='Character', 
                  xaxis_title='Number of Spells Cast',
                  font=dict(
                        family='Cochin',
                        size=18,
                        color = 'black'

                    ),
                  showlegend = False,
                  bargap=0.15,
                  bargroupgap=0.1,
                  plot_bgcolor='#e9e7e2',
                  paper_bgcolor='#d4d1c9'
                  )


fig2.update_layout(hovermode=False)

fig2.update_traces(marker=dict(
    line=dict(width=1.5,
              color='black')))

fig2.update_yaxes(ticks = "outside", tickcolor='#d4d1c9', ticklen=10)

fig2.update_layout(
    yaxis=dict(
        showline=True,
        linewidth=1.5,
        linecolor='black'
    )
)

<IPython.core.display.Javascript object>

# Dash app

In [40]:
display(HTML("<style>.container { width:100% !important; }</style>"))
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})'''))


#Building a jupyter dash app 
app = JupyterDash(__name__)


app.layout = html.Div([
          
                html.Div([
                      
                      html.Div(
                          html.Img(src='https://github.com/devencentyk/datavisualization/blob/main/logo_new.png?raw=true', height='75px'), 
                          style={'textAlign':'center'}),
                      
                      html.Div(
                          html.Img(src='https://github.com/devencentyk/datavisualization/blob/main/subtitle.png?raw=true', height='50px', width='150px'), 
                          style={'textAlign':'center', 'margin-left':'200px'})
                ], style={'height':'130px', 'backgroundColor':'#d4d1c9', 'padding':'20px 0px 0px 0px'}
                      ),

                
                html.Div([
                    
                      html.Div(dcc.Graph(figure=fig2, style={'height':'550px'}), style={'display':'inline-block', 'width':'60%'}),                    

                      html.Div(

                            dcc.Markdown(
                                '''   
                                ## Overview     
                                Have you ever wondered which spells were used the most throughout the HP movies?
                                Or which wizards/witches cast the most spells?
                                Take a look at the graphs to find out! Please note, this is a vizualisation 
                                of **_voiced incantations_**, meaning only spells that were vocally cast 
                                are included in this data. Any spells referenced in the movies, but not cast, 
                                have been omitted.




                                ### Things to know to use the graph below: 
                                * **Double click** on a movie in the **legend** to see the spells cast in that movie alone.
                                * You can **single click** the other movies to **add** them back to the graph one at a time.
                                ''', style={'fontSize':'20px'}
                            ), 
                                style={'color':'black', 
                                       'display':'inline-block', 
                                       'width':'30%', 
                                       'padding': '0px 0px 20px 20px'})

                         ], style={'display':'flex', 
                                   'height':'40%', 
                                   'width':'100%'}),                  

          
                      html.Div(dcc.Graph(figure=fig, style={'height': '700px'}), style={'width': '100%'}),
                       

            html.Div(dcc.Markdown(
                '''
                ### About the Data
                ____________________________________________________________________________________________________________
                The data was obtained by using Regex to count the number of times each spell appeared in each movie dialog.
                Click [here](https://www.kaggle.com/datasets/kornflex/harry-potter-movies-dataset?select=datasets) for he corpus of dialog, and [here](https://www.kaggle.com/datasets/kornflex/harry-potter-movies-dataset?select=datasets) for the spell names and their effects.
                
                *Note: This data is for entertainment purposes only and may contain errors or omissions.*
                '''
                    )
                    ),

      html.Br()

                      ], style={'backgroundColor':'#d4d1c9', 'padding':'25px 20px 20px 20px', "border":"2px black solid"})



if __name__ == '__main__':
    app.run_server(mode='inline')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Dash is running on http://127.0.0.1:8050/



INFO:dash.dash:Dash is running on http://127.0.0.1:8050/



<IPython.core.display.Javascript object>

# Making requirements.txt file

In [36]:
session_info.show() 


<IPython.core.display.Javascript object>

In [37]:
!pip freeze > requirements.txt

<IPython.core.display.Javascript object>