# Temporal Analysis
Temporal analysis can reveal important trends and patterns in the corpus over time.

## Install dependencies 

In [None]:
!pip install pandas

In [None]:
!pip install matplotlib

In [None]:
!pip install openpyxl

## Keyword frequency over time
This will show how certain themes or subjects have gained or lost prominence. We'll focus on the 'Date' and 'Keywords' columns. This code will read the Excel spreadsheet, extract the most frequently occurring keywords, and then plot their annual frequency.

This code allows you to use the dropdown to select a corpus (Benin or Burkina Faso) and the slider to choose the number of keywords. After making your selections, click the "Process Data" button to generate the visualization based on your choices. The data processing and plotting will only occur after the button click, giving you control over when the visualization is generated.

In [None]:
import ipywidgets as widgets
from IPython.display import display
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt

# Function to process data and generate the plot
def process_data(b=None):
    global selected_file_url, selected_num_keywords
    if selected_file_url and selected_num_keywords > 0:
        # Read the Excel file into a DataFrame
        df = pd.read_excel(selected_file_url, engine='openpyxl')

        # Convert the 'Date' column to datetime format
        df['Date'] = pd.to_datetime(df['Date'])

        # Extract the year from the 'Date' column and create a new column 'Year'
        df['Year'] = df['Date'].dt.year

        # Create a Counter object to hold the aggregated keywords
        keyword_counter = Counter()

        # Update the Counter object with keywords from each row
        for keywords in df['Keywords'].dropna():
            keyword_list = keywords.split(' | ')
            keyword_counter.update(keyword_list)

        # Get the most frequently occurring keywords
        most_common_keywords = [keyword for keyword, freq in keyword_counter.most_common(selected_num_keywords)]

        # Initialize a dictionary to hold the annual keyword frequencies
        annual_keyword_freq = {keyword: [] for keyword in most_common_keywords}

        # Loop through each year and count the occurrences of the most common keywords
        for year, group in df.groupby('Year'):
            yearly_counter = Counter()
            for keywords in group['Keywords'].dropna():
                keyword_list = keywords.split(' | ')
                yearly_counter.update(keyword_list)
            for keyword in most_common_keywords:
                annual_keyword_freq[keyword].append(yearly_counter.get(keyword, 0))

        # Create a DataFrame for the annual keyword frequencies
        annual_keyword_df = pd.DataFrame(annual_keyword_freq, index=df['Year'].unique())
        annual_keyword_df.sort_index(inplace=True)

        # Plot the annual keyword frequencies
        plt.figure(figsize=(14, 8))
        for keyword in most_common_keywords:
            plt.plot(annual_keyword_df.index, annual_keyword_df[keyword], label=keyword)

        # Determine the selected country for the title
        selected_country = 'Benin' if 'Benin' in selected_file_url else 'Burkina Faso'
        plt.title(f'Annual Frequency of Top {selected_num_keywords} Keywords in {selected_country}')
        plt.xlabel('Year')
        plt.ylabel('Frequency')
        plt.legend()

        # Show the figure
        plt.show()
    else:
        print("Please select a file and number of keywords from the dropdowns above.")

# URLs of the Excel files on GitHub
excel_urls = {
    'Benin': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Jupyter%20notebooks/Textual%20analysis/corpus_Benin.xlsx',
    'Burkina Faso': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Jupyter%20notebooks/Textual%20analysis/corpus_Burkina.xlsx'
}

# Dropdown widget for selecting the Excel file
excel_file_dropdown = widgets.Dropdown(
    options=excel_urls,
    description='Select File:',
    disabled=False,
)

# Slider widget for selecting the number of keywords
num_keywords_slider = widgets.IntSlider(
    value=10,
    min=1,
    max=20,
    step=1,
    description='Keywords:',
    disabled=False,
    continuous_update=False
)

# Button to trigger the processing
process_button = widgets.Button(description="Process Data")

# Global variables to store the selected file URL and number of keywords
selected_file_url = ''
selected_num_keywords = 10

# Handlers for widget events
def on_file_change(change):
    global selected_file_url
    selected_file_url = change['new']

def on_num_keywords_change(change):
    global selected_num_keywords
    selected_num_keywords = change['new']

# Attach the event handler to the button
process_button.on_click(process_data)

# Attach the event handlers to the widgets
excel_file_dropdown.observe(on_file_change, names='value')
num_keywords_slider.observe(on_num_keywords_change, names='value')

# Display the widgets and the button
display(excel_file_dropdown)
display(num_keywords_slider)
display(process_button)


The plot illustrates the annual frequency of the top X most common keywords in the selected corpus. This visualization allows us to observe several interesting trends:
+ **Variability**: Some keywords exhibit a more stable frequency over the years, while others show considerable fluctuations.
+ **Dominance**: Certain keywords consistently appear more frequently, suggesting their importance or prominence in the dataset.
+ **Temporal Shifts**: There may be years where specific keywords spike or dip, which could be indicative of broader socio-political or cultural changes.

For a more nuanced understanding, this analysis can be complemented with qualitative assessments or further quantitative measures.

## Animated visualization 
Creating an animated visualization of your data in a Jupyter notebook is a great idea to dynamically demonstrate changes over time. To create an animation, we can use matplotlib's animation capabilities. Specifically, FuncAnimation from matplotlib.animation is useful for creating basic animations by repeatedly calling a function.

Given your existing setup, I assume you'd like to animate the change in keyword frequencies over time. Here's a general approach to create such an animation:

1. Define a function that updates the plot for each frame of the animation.
2. Use FuncAnimation to create the animation by calling the update function for each frame.

### Step 1
First, ensure that matplotlib.animation and necessary utilities are imported. You may also need to install ffmpeg or another supported animation writer if it's not already available in your environment.

In [None]:
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

# Function to process data and generate the animated plot
def process_data():
    global selected_file_url, selected_num_keywords
    if selected_file_url and selected_num_keywords > 0:
        df = pd.read_excel(selected_file_url, engine='openpyxl')

        df['Date'] = pd.to_datetime(df['Date'])
        df['Year'] = df['Date'].dt.year

        keyword_counter = Counter()
        for keywords in df['Keywords'].dropna():
            keyword_list = keywords.split(' | ')
            keyword_counter.update(keyword_list)

        most_common_keywords = [keyword for keyword, freq in keyword_counter.most_common(selected_num_keywords)]

        annual_keyword_freq = {keyword: [] for keyword in most_common_keywords}
        for year, group in df.groupby('Year'):
            yearly_counter = Counter()
            for keywords in group['Keywords'].dropna():
                keyword_list = keywords.split(' | ')
                yearly_counter.update(keyword_list)
            for keyword in most_common_keywords:
                annual_keyword_freq[keyword].append(yearly_counter.get(keyword, 0))

        annual_keyword_df = pd.DataFrame(annual_keyword_freq, index=df['Year'].unique())
        annual_keyword_df.sort_index(inplace=True)

        # Prepare the plot for animation
        fig, ax = plt.subplots(figsize=(14, 8))
        lines = {keyword: ax.plot([], [], label=keyword)[0] for keyword in most_common_keywords}
        ax.set_xlim(df['Year'].min(), df['Year'].max())
        ax.set_ylim(0, max(annual_keyword_df.max()))  # Adjust y-axis limits if needed
        selected_country = 'Benin' if 'Benin' in selected_file_url else 'Burkina Faso'
        ax.set_title(f'Annual Frequency of Top {selected_num_keywords} Keywords in {selected_country}')
        ax.set_xlabel('Year')
        ax.set_ylabel('Frequency')
        ax.legend()

        # Function to update each frame in the animation
        def animate(year):
            for keyword, line in lines.items():
                line.set_data(annual_keyword_df.index[annual_keyword_df.index <= year], annual_keyword_df[keyword][annual_keyword_df.index <= year])
            return lines.values()

        # Create the animation
        ani = FuncAnimation(fig, animate, frames=annual_keyword_df.index, blit=True, interval=200)
        plt.close(fig)  # Prevent static plot from displaying
        return ani

    else:
        print("Please select a file and number of keywords from the dropdowns above.")

# URLs of the Excel files on GitHub
excel_urls = {
    'Benin': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Jupyter%20notebooks/Textual%20analysis/corpus_Benin.xlsx',
    'Burkina Faso': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Jupyter%20notebooks/Textual%20analysis/corpus_Burkina.xlsx'
}

# Dropdown widget for selecting the Excel file
excel_file_dropdown = widgets.Dropdown(
    options=excel_urls,
    description='Select File:',
    disabled=False,
)

# Slider widget for selecting the number of keywords
num_keywords_slider = widgets.IntSlider(
    value=10,
    min=1,
    max=20,
    step=1,
    description='Keywords:',
    disabled=False,
    continuous_update=False
)

# Button to trigger the processing
process_button = widgets.Button(description="Process Data")

# Global variables to store the selected file URL and number of keywords
selected_file_url = ''
selected_num_keywords = 10

# Handlers for widget events
def on_file_change(change):
    global selected_file_url
    selected_file_url = change['new']

def on_num_keywords_change(change):
    global selected_num_keywords
    selected_num_keywords = change['new']

# Attach the event handler to the button
def on_button_clicked(b):
    ani = process_data()
    if ani:
        display(HTML(ani.to_html5_video()))

process_button.on_click(on_button_clicked)

# Attach the event handlers to the widgets
excel_file_dropdown.observe(on_file_change, names='value')
num_keywords_slider.observe(on_num_keywords_change, names='value')

# Display the widgets and the button
display(excel_file_dropdown)
display(num_keywords_slider)
display(process_button)


In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import numpy as np
from scipy.interpolate import interp1d

# Function to interpolate data for smoother animation
def interpolate_dataframe(df, num_points=100):
    new_index = np.linspace(df.index.min(), df.index.max(), num_points)
    interpolated_dfs = []
    for column in df.columns:
        interp_func = interp1d(df.index, df[column], kind='linear')
        interpolated_df = pd.DataFrame({column: interp_func(new_index)}, index=new_index)
        interpolated_dfs.append(interpolated_df)
    return pd.concat(interpolated_dfs, axis=1)

# Function to process data and generate the animated plot
def process_data():
    global selected_file_url, selected_num_keywords
    if selected_file_url and selected_num_keywords > 0:
        df = pd.read_excel(selected_file_url, engine='openpyxl')

        df['Date'] = pd.to_datetime(df['Date'])
        df['Year'] = df['Date'].dt.year

        keyword_counter = Counter()
        for keywords in df['Keywords'].dropna():
            keyword_list = keywords.split(' | ')
            keyword_counter.update(keyword_list)

        most_common_keywords = [keyword for keyword, freq in keyword_counter.most_common(selected_num_keywords)]

        annual_keyword_freq = {keyword: [] for keyword in most_common_keywords}
        for year, group in df.groupby('Year'):
            yearly_counter = Counter()
            for keywords in group['Keywords'].dropna():
                keyword_list = keywords.split(' | ')
                yearly_counter.update(keyword_list)
            for keyword in most_common_keywords:
                annual_keyword_freq[keyword].append(yearly_counter.get(keyword, 0))

        annual_keyword_df = pd.DataFrame(annual_keyword_freq, index=df['Year'].unique())
        annual_keyword_df.sort_index(inplace=True)

        # Interpolate data for smoother transitions
        annual_keyword_df = interpolate_dataframe(annual_keyword_df)

        # Prepare the plot for animation
        fig, ax = plt.subplots(figsize=(14, 8))
        lines = {keyword: ax.plot([], [], label=keyword)[0] for keyword in most_common_keywords}
        ax.set_xlim(df['Year'].min(), df['Year'].max())
        ax.set_ylim(0, max(annual_keyword_df.max().max(), 1))  # Adjust y-axis limits if needed
        selected_country = 'Benin' if 'Benin' in selected_file_url else 'Burkina'
        ax.set_title(f'Annual Frequency of Top {selected_num_keywords} Keywords in {selected_country}')
        ax.set_xlabel('Year')
        ax.set_ylabel('Frequency')
        ax.legend()

        # Function to update each frame in the animation
        def animate(year):
            for keyword, line in lines.items():
                line.set_data(annual_keyword_df.index[annual_keyword_df.index <= year], annual_keyword_df[keyword][annual_keyword_df.index <= year])
            return lines.values()

        # Create the animation with more frames and a shorter interval
        ani = FuncAnimation(fig, animate, frames=np.linspace(annual_keyword_df.index.min(), annual_keyword_df.index.max(), 200), blit=True, interval=50)
        plt.close(fig)  # Prevent static plot from displaying
        return ani

    else:
        print("Please select a file and number of keywords from the dropdowns above.")

# URLs of the Excel files on GitHub
excel_urls = {
    'Benin': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Jupyter%20notebooks/Textual%20analysis/corpus_Benin.xlsx',
    'Burkina': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Jupyter%20notebooks/Textual%20analysis/corpus_Burkina.xlsx'
}

# Dropdown widget for selecting the Excel file
excel_file_dropdown = widgets.Dropdown(
    options=excel_urls,
    description='Select File:',
    disabled=False,
)

# Slider widget for selecting the number of keywords
num_keywords_slider = widgets.IntSlider(
    value=10,
    min=1,
    max=20,
    step=1,
    description='Keywords:',
    disabled=False,
    continuous_update=False
)

# Button to trigger the processing
process_button = widgets.Button(description="Process Data")

# Global variables to store the selected file URL and number of keywords
selected_file_url = ''
selected_num_keywords = 10

# Handlers for widget events
def on_file_change(change):
    global selected_file_url
    selected_file_url = change['new']

def on_num_keywords_change(change):
    global selected_num_keywords
    selected_num_keywords = change['new']

# Attach the event handler to the button
def on_button_clicked(b):
    ani = process_data()
    if ani:
        display(HTML(ani.to_html5_video()))

process_button.on_click(on_button_clicked)

# Attach the event handlers to the widgets
excel_file_dropdown.observe(on_file_change, names='value')
num_keywords_slider.observe(on_num_keywords_change, names='value')

# Display the widgets and the button
display(excel_file_dropdown)
display(num_keywords_slider)
display(process_button)


### Notes
The animation's speed and quality can be adjusted using parameters in FuncAnimation (like interval for frame delay).
Ensure your Jupyter environment has the necessary backend support for animations. Sometimes additional setup might be required for animations to display correctly in Jupyter notebooks.
The animation is created as an HTML5 video, which is a common and convenient format for Jupyter notebooks.

## Compare multiple keywords
To compare multiple keywords over time, we can modify the existing code to allow for the selection of specific keywords of interest. In this code, replace the list **keywords_to_compare** with the specific keywords you wish to analyze. The plot will then show the annual frequencies of these selected keywords, allowing for a comparative analysis.

In [None]:
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter

# Replace the existing fetch_keywords function with the updated one
def fetch_keywords(csv_url):
    df = pd.read_csv(csv_url)
    if 'dcterms:title' in df.columns:
        keywords = df['dcterms:title'].dropna().unique()
        # Splitting the entries and keeping only the first value
        split_keywords = [kw.split('|')[0].strip() for kw in keywords]
        return sorted(set(split_keywords))  # Using a set to remove duplicates
    return []

# URLs of the CSV files
csv_files = {
    'Events': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Metadata/index_events.csv',
    'Organizations': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Metadata/index_organizations.csv',
    'Persons': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Metadata/index_persons.csv',
    'Topics': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Metadata/index_topics.csv'
}

# Dropdown widget for selecting the category
category_dropdown = widgets.Dropdown(
    options=list(csv_files.keys()),
    description='Category:',
    disabled=False,
)

# Dropdown widget for selecting keywords, updated based on category
keywords_dropdown = widgets.Dropdown(
    options=[],
    description='Keywords:',
    disabled=False,
)

# Button to add selected keyword to the list
add_keyword_button = widgets.Button(description="Add Keyword")

# Button to trigger the processing
process_button = widgets.Button(description="Process Data")

# Global variable to store selected keywords
selected_keywords = []

# Function to update keyword dropdown based on selected category
def update_keywords_dropdown(change):
    category = change['new']
    if category in csv_files:
        keywords_dropdown.options = fetch_keywords(csv_files[category])
    else:
        keywords_dropdown.options = []

# Function to add selected keyword to list
def add_keyword(b):
    if keywords_dropdown.value and keywords_dropdown.value not in selected_keywords:
        selected_keywords.append(keywords_dropdown.value)
        print(f"Added keyword: {keywords_dropdown.value}")
    else:
        print("Please select a valid keyword.")

# Function to process data and generate the plot
def process_data(b=None):
    selected_file_url = excel_file_dropdown.value
    if selected_file_url and selected_keywords:
        df = pd.read_excel(selected_file_url, engine='openpyxl')
        df['Date'] = pd.to_datetime(df['Date'])
        df['Year'] = df['Date'].dt.year

        plt.figure(figsize=(14, 8))
        for keyword in selected_keywords:
            annual_keyword_freq = []
            for year in sorted(df['Year'].unique()):
                yearly_data = df[df['Year'] == year]['Keywords'].dropna()
                yearly_keywords = [kw.strip() for kws in yearly_data for kw in kws.split(' | ')]
                frequency = yearly_keywords.count(keyword)
                annual_keyword_freq.append(frequency)

            plt.plot(sorted(df['Year'].unique()), annual_keyword_freq, label=keyword)

        plt.title('Annual Frequency of Selected Keywords')
        plt.xlabel('Year')
        plt.ylabel('Frequency')
        plt.legend()
        plt.show()
    else:
        print("Please select a file and add keywords.")

# Dropdown widget for selecting the Excel file
excel_urls = {
    'Benin': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Jupyter%20notebooks/Textual%20analysis/corpus_Benin.xlsx',
    'Burkina Faso': 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Jupyter%20notebooks/Textual%20analysis/corpus_Burkina.xlsx'
}
excel_file_dropdown = widgets.Dropdown(
    options=excel_urls,
    description='Select File:',
    disabled=False,
)

# Attach event handlers
category_dropdown.observe(update_keywords_dropdown, names='value')
add_keyword_button.on_click(add_keyword)
process_button.on_click(process_data)

# Display the widgets
display(excel_file_dropdown)
display(category_dropdown)
display(keywords_dropdown)
display(add_keyword_button)
display(process_button)


In [None]:
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt

# URL of the Excel file on GitHub
excel_url = 'https://raw.githubusercontent.com/fmadore/Islam-West-Africa-Collection/main/Jupyter%20notebooks/Textual%20analysis/corpus_Benin.xlsx'

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_url, engine='openpyxl')

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Extract the year from the 'Date' column and create a new column 'Year'
df['Year'] = df['Date'].dt.year

# Keywords to compare (replace these with the keywords you're interested in)
keywords_to_compare = ['Hadj', 'Women in Islam', 'Terrorism and radicalization', 'Cooperation with Arab states', 'Secularism', 'Islamic faith-based education', 'Religious pluralism']

# Initialize a dictionary to hold the annual keyword frequencies
annual_keyword_freq = {keyword: [] for keyword in keywords_to_compare}

# Loop through each year and count the occurrences of the specified keywords
for year, group in df.groupby('Year'):
    yearly_counter = Counter()
    for keywords in group['Keywords'].dropna():
        keyword_list = keywords.split(' | ')
        yearly_counter.update(keyword_list)
    for keyword in keywords_to_compare:
        annual_keyword_freq[keyword].append(yearly_counter.get(keyword, 0))

# Create a DataFrame for the annual keyword frequencies
annual_keyword_df = pd.DataFrame(annual_keyword_freq, index=df['Year'].unique())
annual_keyword_df.sort_index(inplace=True)

# Plot the annual keyword frequencies
plt.figure(figsize=(14, 8))
for keyword in keywords_to_compare:
    plt.plot(annual_keyword_df.index, annual_keyword_df[keyword], label=keyword)
plt.title('Annual Frequency of Selected Keywords')
plt.xlabel('Year')
plt.ylabel('Frequency')
plt.legend()

# Save the figure
plt.savefig('Annual_frequency_selected_keywords.png')
# Show the figure
plt.show()