# DBLP PUBLICATION DATA EXTRACTION
In this notebook, I will extract publication data from the DBLP API. The process involves making API calls based on vowel queries and then processing the XML response to capture relevant publication details. The final dataset will be displayed in a structured tabular format.

In [None]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
from IPython.display import display, HTML

# Initialize the base URL and endpoint for the DBLP API
BASE_URL = "http://dblp.org/search/publ/api"

vowels = ['a', 'e', 'i', 'o', 'u']
all_records = []

for vowel in vowels:
    params = {
        'q': vowel,
        'format': 'xml',
        'h': 1000  # retrieve 1000 hits per request. Adjust as needed.
    }

    response = requests.get(BASE_URL, params=params)

    if response.status_code != 200:
        print(
            f"API request for vowel '{vowel}' failed with status code:", response.status_code)
        continue

    response_content = response.content
    root = ET.fromstring(response_content)

    for hit in root.findall('.//hit'):
        info = hit.find('./info')
        authors = [author.text for author in hit.findall('.//author')]
        record = {
            'Publication Title': info.find('title').text if info.find('title') is not None else None,
            'List of Authors': authors,
            'Publication Year': info.find('year').text if info.find('year') is not None else None,
            'Publication Venue': info.find('venue').text if info.find('venue') is not None else None,
            'Publication Volume': info.find('volume').text if info.find('volume') is not None else None,
            'Issue Number': info.find('number').text if info.find('number') is not None else None,
            'Page Numbers': info.find('pages').text if info.find('pages') is not None else None,
            'Publication URL': info.find('url').text if info.find('url') is not None else None,
            'Digital Object Identifier (DOI)': info.find('ee').text if info.find('ee') is not None else None,
            'Publishing Entity': info.find('publisher').text if info.find('publisher') is not None else None,
            'Cross-Referenced Source': info.find('crossref').text if info.find('crossref') is not None else None,
            'Book Title': info.find('booktitle').text if info.find('booktitle') is not None else None,
            'Publication Series': info.find('series').text if info.find('series') is not None else None,
            'Additional Notes': info.find('note').text if info.find('note') is not None else None,
            'Publication Type': info.find('type').text if info.find('type') is not None else None,
        }
        all_records.append(record)

# Convert all_records into a DataFrame
df = pd.DataFrame(all_records)

# Convert 'Digital Object Identifier (DOI)' and 'Publication URL' columns to clickable links
df['Digital Object Identifier (DOI)'] = df['Digital Object Identifier (DOI)'].apply(
    lambda x: f'<a href="{x}" target="_blank">{x}</a>' if pd.notna(x) else "")
df['Publication URL'] = df['Publication URL'].apply(
    lambda x: f'<a href="{x}" target="_blank">{x}</a>' if pd.notna(x) else "")

# Function to center-align the content of a DataFrame for display


def center_align_dataframe(df):
    return df.style.set_properties(**{'text-align': 'center'}).set_table_styles([
        dict(selector='th', props=[('text-align', 'center')])
    ])


# Display the DataFrame with center-aligned content
display(HTML(center_align_dataframe(df.sample(5)).to_html()))

# EXPORTING DATA TO EXCEL
In this section, the previously extracted and processed data from the DBLP API is exported to an Excel file for offline analysis and sharing. 

In [None]:
# DEFINE THE FILE PATH
# Here, I specify the path where the Excel file will be saved.
excel_file_path = "dblp.xlsx"

# CONVERT DATAFRAME TO EXCEL
# The DataFrame is converted to an Excel file format with no index column.
df.to_excel(excel_file_path, index=False)

# CONFIRMATION MESSAGE
# This prints a confirmation message indicating where the file has been saved.
print(f"The Excel file has been saved to {excel_file_path}")

# DOWNLOADING AND PROCESSING THE KAGGLE DATASET
In this section, I authenticate with the Kaggle API to download a dataset that predicts gender based on names. After downloading, the dataset is processed and displayed for preliminary inspection.

In [None]:
# IMPORT REQUIRED LIBRARIES
from IPython.display import display
import os
import pandas as pd
from kaggle.api.kaggle_api_extended import KaggleApi
import zipfile

# SET KAGGLE CREDENTIALS
# These are the environment variables Kaggle uses for authentication.
os.environ['KAGGLE_USERNAME'] = "vmudila"
os.environ['KAGGLE_KEY'] = "f226623fabda5bac0a2967fbb2f90c09"

# INITIALIZE KAGGLE API
api = KaggleApi()
api.authenticate()

# DOWNLOAD THE DATASET
# This downloads a .zip file of the dataset to the current directory.
zip_file = 'gender-prediction-by-using-name.zip'
api.dataset_download_files(
    'monukhan/gender-prediction-by-using-name', path='.', unzip=False)

# UNZIP THE DOWNLOADED FILE
# Extracting all files from the downloaded .zip file.
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall()

# DEFINE COLUMN NAMES FOR THE DATASET
column_names = ['Name', 'Gender', 'Accuracy']

# LOAD AND FORMAT THE DATASET
# I assume the CSV has headers, so I skip the first row using skiprows=1.
data = pd.read_csv('name_gender.csv', names=column_names, skiprows=1)

# DISPLAY SAMPLE DATA WITHOUT THE ACCURACY COLUMN
# A sample of 5 records from the dataset is displayed with "Accuracy" column excluded.
display(data[['Name', 'Gender']].sample(5).style.set_table_styles([
    {'selector': 'th', 'props': 'text-align: center;'},
    {'selector': 'td', 'props': 'text-align: center;'}
]))

# EXTRACTING FIRST NAMES AND GENDER CLASSIFICATION
In this section, I utilize the Kaggle dataset to classify authors by gender based on their first names. I first define utility functions to determine the gender of a given first name, and then apply this to the entire dataset. The updated dataset will have new columns indicating the first names of the authors and their predicted genders.

In [None]:
# FUNCTION TO DETERMINE GENDER BASED ON FIRST NAME
# This function queries the Kaggle dataset to get the gender classification of a given name.
def get_gender(name):
    if name is None:
        return "Not classified"

    gender_row = data[data['Name'].str.lower() == name.lower()]

    if gender_row.empty:
        return "Not classified"

    return gender_row['Gender'].iloc[0]

# FUNCTION TO EXTRACT FIRST NAMES AND GENDERS OF AUTHORS IN A RECORD
# For each record, this function extracts the first names of the authors, determines their genders, and adds this information to the record.


def extract_first_names_and_genders(row):
    authors = row['List of Authors']
    first_names = []
    genders = []

    for author in authors:
        first_name = author.split()[0]
        gender = get_gender(first_name)
        first_names.append(first_name)
        genders.append(gender)

    row['First_Name'] = ', '.join(first_names)
    row['Gender'] = ', '.join(genders)
    return row


# APPLY THE EXTRACTION FUNCTION TO THE DATASET
# The function is applied row-wise to the DataFrame.
df = df.apply(extract_first_names_and_genders, axis=1)

# DISPLAY SAMPLE DATA WITH UPDATED COLUMNS
# A sample of 5 records from the updated dataset is displayed with center-aligned columns.
display(HTML(center_align_dataframe(df.sample(5)).to_html(escape=False)))

# SAVING THE UPDATED DATA TO EXCEL
This section covers the saving of the DataFrame, which now includes the gender predictions, to an Excel file. This can be useful for offline analysis and record-keeping purposes.

In [None]:
# FUNCTION TO SAVE DATAFRAME WITH GENDER PREDICTIONS TO EXCEL
# Given the updated DataFrame, this code saves the data to an Excel file named 'dblp_with_gender.xlsx'.
excel_file_path_with_gender = "dblp_with_gender.xlsx"
df.to_excel(excel_file_path_with_gender, index=False)

# NOTIFICATION MESSAGE
# Print a notification message confirming the saving of the Excel file.
print(f"The Excel file with gender predictions has been saved to {excel_file_path_with_gender}")

# CONVERSION OF STRING REPRESENTATION OF LISTS TO ACTUAL LISTS
In the DataFrame, the column "List of Authors" contains strings that represent lists of authors. To better handle these lists programmatically, this section will convert these string representations into actual Python lists.

In [None]:
# FUNCTION TO CONVERT STRING REPRESENTATION OF LISTS TO ACTUAL LISTS
# This function checks if a value is a string representation of a list.
# If it is, it converts the string to a list. If the conversion fails or if the value is not a string,
# it returns the original value or an empty list respectively.
def to_list(val):
    try:
        if isinstance(val, str):  # Check if the value is a string
            # Attempt to evaluate the string as a Python expression
            return eval(val)
        else:
            return val  # Return the original value if it's not a string
    except:
        return []  # Return an empty list if there's any error in conversion


# APPLY THE CONVERSION FUNCTION TO THE 'List of Authors' COLUMN
# The to_list function is applied to each entry in the 'List of Authors' column of the DataFrame.
df['List of Authors'] = df['List of Authors'].apply(to_list)

# INITIALIZING DASH APPLICATION
This section sets up the necessary foundation for creating an interactive web application using Dash. Dash is a productive Python framework for building web applications. Here, we'll also use `dash_bootstrap_components` for styling with the Bootstrap framework.

In [None]:
# IMPORT NECESSARY LIBRARIES
# The following libraries will be used for creating the Dash application, visualizations, and data processing.
import dash
from dash import dcc, html
from dash import dash_table
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import networkx as nx


import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

# INITIALIZE THE DASH APPLICATION
# I set up the app with a Bootstrap theme for better visual appeal.
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Setting up the server variable which will be required later on when deploying the app.
server = app.server

# CONVERTING 'LIST OF AUTHORS' TO STRING FORMAT

In [None]:
# The following code takes the 'List of Authors' column in the DataFrame 'df',
# which is currently in list format, and converts it to a comma-separated string.
# This is useful for displaying the authors in a readable format in tables or for saving to text-based files.
df['List of Authors'] = df['List of Authors'].apply(', '.join)

# 1. GENDER DISTRIBUTION ANALYSIS

In [None]:
# Split and explode gender data
gender_dist = df['Gender'].str.split(', ').explode()

# Classify gender data
def classify_gender(gender):
    if gender == 'M':
        return 'Male'
    elif gender == 'F':
        return 'Female'
    else:
        return 'Not classified'

# Map genders and count the occurrences
gender_dist = gender_dist.map(classify_gender).value_counts().reset_index()
gender_dist.columns = ['Gender', 'count']  # Rename columns for easier reference

# Custom colors
colors = ['#1f77b4', '#ff7f0e', '#d62728']  # Modify as per preference

# Create an interactive donut chart
gender_donut = px.pie(
    data_frame=gender_dist,
    names='Gender',  # Use the renamed column 'Gender'
    values='count',  # Use the renamed column 'count'
    hole=0.3,
    color_discrete_sequence=colors,
    labels={'Gender': 'Gender Type', 'count': 'Count'},
    height=500,
    width=800
)

# Customize hover info
hovertemplate = "<b>%{label}</b><br>%{value} individuals<br>%{percent:.1%}"
gender_donut.update_traces(
    textinfo='percent+label',
    hoverinfo='label+value+percent',
    hovertemplate=hovertemplate,
    pull=[0.1, 0.1, 0.05],  # Enhance pull for better emphasis on slices
    opacity=0.9,            # Setting a slightly lower opacity for a softer appearance
    marker=dict(line=dict(color='#000000', width=1)),  # Adding a border to each pie slice
    selector=dict(type='pie')
)

# Enhance overall layout
gender_donut.update_layout(
    margin=dict(t=60, b=0, l=(gender_donut.layout.width - 800) / 2, r=0),
    legend_title_text='Gender',
    legend=dict(font=dict(size=14), y=0.95, x=0.01),
    uniformtext_minsize=15,
    uniformtext_mode='hide',
    title={
        'text': "GENDER DISTRIBUTION ANALYSIS",
        'y': 0.98,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {
            'size': 24,
            'color': 'black'
        }
    }
)

# Display the chart
gender_donut.show()

# 2. YEARLY PUBLICATION COUNT WITH GENDER TREND LINES

In [None]:
# Split and explode gender data to classify each publication with its respective gender
df['Exploded Gender'] = df['Gender'].str.split(', ')

# Apply the 'classify_gender' function to classify each entry into 'Male', 'Female', or 'Not classified'
expanded_df = df.explode('Exploded Gender')
expanded_df['Classified Gender'] = expanded_df['Exploded Gender'].apply(
    classify_gender)

# Group the data by 'Publication Year' and 'Classified Gender' to count the number of publications for each category
gender_yearly_publication = expanded_df.groupby(
    ['Publication Year', 'Classified Gender'])['Publication Title'].count().unstack().reset_index()

# Initialize a Figure object for plotting
yearly_gender_trend_fig = go.Figure()

# Add a histogram trace to the figure for total publications per year
total_publications = df.groupby('Publication Year')[
    'Publication Title'].count()
yearly_gender_trend_fig.add_trace(go.Bar(x=total_publications.index,
                                         y=total_publications.values,
                                         name='Total Publications',
                                         opacity=0.6, yaxis='y1'))

# Loop over gender categories to add trend lines for each
for gender, color in zip(['Male', 'Female', 'Not classified'], colors):
    yearly_gender_trend_fig.add_trace(go.Scatter(x=gender_yearly_publication['Publication Year'],
                                                 y=gender_yearly_publication[gender],
                                                 mode='lines+markers',
                                                 name=gender,
                                                 line=dict(color=color, width=2), yaxis='y2'))

# Configure the layout and appearance of the figure
yearly_gender_trend_fig.update_layout(
    width=1200,
    height=600,
    title={
        'text': "YEARLY PUBLICATION COUNT WITH GENDER TREND LINES",
        'y': 0.98,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {
            'size': 24,
            'color': 'black'
        }
    },
    xaxis_title="Publication Year",
    xaxis=dict(showgrid=True, showline=True,
               linecolor='rgb(204, 204, 204)', linewidth=2),
    yaxis_title="Number of Publications",
    yaxis=dict(showgrid=True, showline=True,
               linecolor='rgb(204, 204, 204)', linewidth=2),
    legend_title_text='Gender',
    legend=dict(font=dict(size=14), y=0.95, x=0.01),
    margin=dict(l=50, r=50),
    yaxis2=dict(
        title="Number of Individuals",
        overlaying='y',
        side='right',
        showgrid=False,
        showline=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
    )
)

yearly_gender_trend_fig.update_xaxes(
    ticks="outside", tickwidth=2, tickcolor='rgb(204, 204, 204)', ticklen=10)
yearly_gender_trend_fig.update_yaxes(
    ticks="outside", tickwidth=2, tickcolor='rgb(204, 204, 204)', ticklen=10)

# 3. PUBLICATION VENUE DISTRIBUTION OVER YEARS

In [None]:

# Filter out missing values in the 'Publication Venue' column
filtered_df = df[df['Publication Venue'].notna()]

# Get the list of unique venues
venue_dropdown = sorted(filtered_df['Publication Venue'].unique())

# Initialize a subplot figure
venue_publication_fig = make_subplots(
    rows=1, cols=1, specs=[[{'type': 'scatter'}]])

# Create the initial traces for each venue
initial_traces = []
for venue in venue_dropdown:
    initial_trace = go.Scatter(x=[], y=[], mode='lines+markers', name=venue)
    initial_traces.append(initial_trace)
    for label, color in zip(['Male', 'Female', 'Not Classified'], colors):
        initial_traces.append(go.Scatter(
            x=[], y=[], mode='lines+markers', name=label, line=dict(color=color)))

# Add the initial traces to the subplot figure
for trace in initial_traces:
    venue_publication_fig.add_trace(trace)

# Configure the layout and appearance of the figure
venue_publication_fig.update_layout(
    width=1200,
    height=600,
    title={
        'text': "PUBLICATION VENUE DISTRIBUTION OVER YEARS",
        'y': 0.98,
        'x': 0.25,  # Adjusted x value for left alignment
        'xanchor': 'left',  # Set anchor to left
        'yanchor': 'top',
        'font': {
                'size': 24,
                'color': 'black'
        }
    },
    xaxis_title="Publication Year",
    yaxis_title="Number of Individuals",
    legend_title_text="Publication Venue",
    # Adjusted x value for right alignment
    legend=dict(font=dict(size=14), y=0.95, x=1),
    margin=dict(l=50, r=50),
    autosize=False,  # Disable automatic resizing
)

# Define the callback function for the dropdown menu


@app.callback(
    Output('venue-publication-graph', 'figure'),
    [Input('venue-dropdown', 'value')]
)
def update_venue(selected_venue):
    if selected_venue is None:
        selected_venue = venue_dropdown[0]

    # Filter data for the selected venue
    selected_data = filtered_df[filtered_df['Publication Venue']
                                == selected_venue].copy()

    # Process the 'Gender' column to get counts for each category
    selected_data['Male'] = selected_data['Gender'].apply(
        lambda x: x.count('M'))
    selected_data['Female'] = selected_data['Gender'].apply(
        lambda x: x.count('F'))
    selected_data['Not Classified'] = selected_data['Gender'].apply(
        lambda x: x.count('Not classified'))

    # Group the data by 'Publication Year'
    grouped_data = selected_data.groupby(['Publication Year'])[
        ['Male', 'Female', 'Not Classified']].sum().reset_index()

    # Update the existing traces with new data
    for i, venue in enumerate(venue_dropdown):
        if venue == selected_venue:
            for j, label in enumerate(['Male', 'Female', 'Not Classified']):
                venue_publication_fig.data[i *
                                           len(colors) + j].x = grouped_data['Publication Year']
                venue_publication_fig.data[i *
                                           len(colors) + j].y = grouped_data[label]
                venue_publication_fig.data[i *
                                           len(colors) + j].line.color = colors[j]
                venue_publication_fig.data[i * len(colors) + j].name = label
        else:
            for j in range(len(colors)):
                venue_publication_fig.data[i * len(colors) + j].x = []
                venue_publication_fig.data[i * len(colors) + j].y = []
                venue_publication_fig.data[i *
                                           len(colors) + j].line.color = colors[j]
                venue_publication_fig.data[i * len(colors) + j].name = None

    return venue_publication_fig

# 4. PUBLICATION TYPE DISTRIBUTION OVER YEARS

In [None]:


# Filter out missing values in the 'Publication Type' column
filtered_df = df[df['Publication Type'].notna()]

# Get the list of unique publication types
type_dropdown = sorted(filtered_df['Publication Type'].unique())

# Initialize a subplot figure
type_publication_fig = make_subplots(
    rows=1, cols=1, specs=[[{'type': 'scatter'}]])

# Create the initial traces for each publication type
initial_traces = []
for pub_type in type_dropdown:
    initial_trace = go.Scatter(x=[], y=[], mode='lines+markers', name=pub_type)
    initial_traces.append(initial_trace)
    for label, color in zip(['Male', 'Female', 'Not Classified'], colors):
        initial_traces.append(go.Scatter(
            x=[], y=[], mode='lines+markers', name=label, line=dict(color=color)))

# Add the initial traces to the subplot figure
for trace in initial_traces:
    type_publication_fig.add_trace(trace)

# Configure the layout and appearance of the figure
type_publication_fig.update_layout(
    width=1200,
    height=600,
    title={
        'text': "PUBLICATION TYPE DISTRIBUTION OVER YEARS",
        'y': 0.98,
        'x': 0.25,  # Adjusted x value for left alignment
        'xanchor': 'left',  # Set anchor to left
        'yanchor': 'top',
        'font': {
                'size': 24,
                'color': 'black'
        }
    },
    xaxis_title="Publication Year",
    yaxis_title="Number of Individuals",
    legend_title_text="Publication Type",
    # Adjusted x value for right alignment
    legend=dict(font=dict(size=14), y=0.95, x=1),
    margin=dict(l=50, r=50),
    autosize=False,  # Disable automatic resizing
)

# Define the callback function for the dropdown menu


@app.callback(
    Output('type-publication-graph', 'figure'),
    [Input('type-dropdown', 'value')]
)
def update_pub_type(selected_type):
    if selected_type is None:
        selected_type = type_dropdown[0]

    # Filter data for the selected publication type
    selected_data = filtered_df[filtered_df['Publication Type']
                                == selected_type].copy()

    # Process the 'Gender' column to get counts for each category
    selected_data['Male'] = selected_data['Gender'].apply(
        lambda x: x.count('M'))
    selected_data['Female'] = selected_data['Gender'].apply(
        lambda x: x.count('F'))
    selected_data['Not Classified'] = selected_data['Gender'].apply(
        lambda x: x.count('Not classified'))

    # Group the data by 'Publication Year'
    grouped_data = selected_data.groupby(['Publication Year'])[
        ['Male', 'Female', 'Not Classified']].sum().reset_index()

    # Update the existing traces with new data
    for i, pub_type in enumerate(type_dropdown):
        if pub_type == selected_type:
            for j, label in enumerate(['Male', 'Female', 'Not Classified']):
                type_publication_fig.data[i *
                                          len(colors) + j].x = grouped_data['Publication Year']
                type_publication_fig.data[i *
                                          len(colors) + j].y = grouped_data[label]
                type_publication_fig.data[i *
                                          len(colors) + j].line.color = colors[j]
                type_publication_fig.data[i *
                                          len(colors) + j].name = label
        else:
            for j in range(len(colors)):
                type_publication_fig.data[i * len(colors) + j].x = []
                type_publication_fig.data[i * len(colors) + j].y = []
                type_publication_fig.data[i *
                                          len(colors) + j].line.color = colors[j]
                type_publication_fig.data[i * len(colors) + j].name = None

    return type_publication_fig

# Project Web Application Code

This section contains the code for creating the web application using Dash and Plotly libraries.

In [None]:
from IPython.display import display, HTML

# Define color palette for visualizations
colors_palette = ['#1f77b4', '#ff7f0e', '#d62728']

# Define the app layout
app.layout = html.Div([
    # Navigation Bar
    html.Nav([
        html.A('Home', href='#home', style={'margin-right': '20px'}),
        html.A('Gender Distribution', href='#gender-distribution',
               style={'margin-right': '20px'}),
        html.A('Yearly Gender Trend', href='#yearly-gender-trend',
               style={'margin-right': '20px'}),
        html.A('Publication Venue Distribution',
               href='#venue-publication-distribution', style={'margin-right': '20px'}),
        html.A('Publication Type Distribution',
               href='#type-publication-distribution', style={'margin-right': '20px'}),
        html.A('About', href='#about', style={'margin-right': '20px'}),
        html.A('Contact', href='#contact', style={'margin-right': '20px'}),
    ], style={'text-align': 'center', 'background-color': '#f7f7f7', 'padding': '10px'}),

    # Home Page
    html.Section([
        html.H1("Welcome to My Data Visualization Project - Visualising historical gender diversity in CS publications",
                id='home', style={'text-align': 'center'}),
        html.P("Exploring various visualizations showcasing gender distribution, publication trends, and more.", style={
               'text-align': 'center'}),
    ], style={'padding': '30px 0', 'background-color': '#87CEEB'}),

    # Gender Distribution
    html.Section([
        html.H2(id='gender-distribution', style={'text-align': 'center'}),
        html.Div([
            dcc.Graph(figure=gender_donut, config={'displayModeBar': False}),
        ], style={'display': 'inline-block', 'padding': '20px'}),
    ], style={'padding': '30px 0', 'background-color': '#ffffff', 'text-align': 'center'}),

    # Yearly Gender Trend
    html.Section([
        html.H2(id='yearly-gender-trend', style={'text-align': 'center'}),
        html.Div([
            dcc.Graph(id='yearly-gender-trend-graph',
                      figure=yearly_gender_trend_fig, config={'displayModeBar': False}),
        ], style={'display': 'inline-block', 'padding': '20px'}),
    ], style={'padding': '30px 0', 'background-color': '#f7f7f7', 'text-align': 'center'}),

    # Publication Venue Distribution
    html.Section([
        html.H2(id='venue-publication-distribution',
                style={'text-align': 'center'}),
        html.Div([
            dcc.Graph(id='venue-publication-graph',
                      figure=venue_publication_fig, config={'displayModeBar': False}),
            dcc.Dropdown(id='venue-dropdown',
                         options=[{'label': venue, 'value': venue}
                                  for venue in sorted(venue_dropdown)],
                         value=venue_dropdown[0],
                         multi=False,
                         placeholder="Select a Venue",
                         style={'width': '100%'}
                         )
        ], style={'display': 'inline-block', 'padding': '10px 20px 20px 0'}),
    ], style={'padding': '30px 0', 'background-color': '#ffffff', 'text-align': 'center'}),

    # Publication Type Distribution
    html.Section([
        html.H2(id='type-publication-distribution',
                style={'text-align': 'center'}),
        html.Div([
            dcc.Graph(id='type-publication-graph',
                      figure=type_publication_fig, config={'displayModeBar': False}),
            dcc.Dropdown(id='type-dropdown',
                         options=[{'label': pub_type, 'value': pub_type}
                                  for pub_type in sorted(type_dropdown)],
                         value=type_dropdown[0],
                         multi=False,
                         placeholder="Select a Type",
                         style={'width': '100%'}
                         )
        ], style={'display': 'inline-block', 'padding': '10px 20px 20px 0'}),
    ], style={'padding': '30px 0', 'background-color': '#f7f7f7', 'text-align': 'center'}),


    # About Page
    html.Section([
        html.H2("About", id='about', style={'text-align': 'center'}),
        html.P("This web application was created by Vamsi Mudila."),
        html.P(
            "Explore the data visualizations and gain insights into the provided data."),
        html.A("LinkedIn Profile",
               href="https://www.linkedin.com/in/vamsi-mudila/", target="_blank"),
    ], style={'padding': '30px 0', 'background-color': '#87CEEB', 'text-align': 'center'}),

    # Contact Page
    html.Section([
        html.H2("Contact", id='contact', style={'text-align': 'center'}),
        html.P("Feel free to contact me with any questions or feedback."),
        html.P("Email: vamsi.mudila@gmail.com"),
    ], style={'padding': '30px 0', 'background-color': '#87CEEB', 'text-align': 'center'}),


    # Footer
    html.Footer([
        html.P("© 2023 Vamsi Mudila. All rights reserved.",
               style={'text-align': 'center'}),
    ], style={'text-align': 'center', 'background-color': '#f7f7f7', 'color': 'black', 'padding': '10px'}),

], style={'font-family': 'Arial, sans-serif', 'background-color': '#f0f0f0'})

# Display the link to open the web application

link = "http://127.0.0.1:8050/"
display(HTML(
    f'<p style="text-align: center;">Click the link below to open the web application:<br><b><a href="{link}" target="_blank">{link}</a></b></p>'
))


# Run the app if executed directly

if __name__ == '__main__':
    app.run_server(debug=True)