In [3]:
import plotly.graph_objects as go
import plotly.io as pio

# Define the base template
base = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='#FFF5CC',
        plot_bgcolor='#FFF5CC',
        height=800,
        width=800 * 1.618,
        xaxis=dict(
            anchor='y',
            showgrid=True,
            gridcolor='#888888',  # Darker grid lines
            tickfont=dict(
                size=24,
                family='Open Sans, sans-serif'  # Use Open Sans font
            ),
            titlefont=dict(
                size=26,
                family='Open Sans, sans-serif'  # Use Open Sans font
            ),
            linecolor='#333333',
            linewidth=2  # Adjust the thickness of the x-axis line
        ),
        yaxis=dict(
            anchor='x',
            showgrid=True,
            gridcolor='#888888',  # Darker grid lines
            tickfont=dict(
                size=24,
                family='Open Sans, sans-serif'
            ),
            titlefont=dict(
                size=26,
                family='Open Sans, sans-serif'
            ),
            linecolor='#333333',
            linewidth=2  # Adjust the thickness of the y-axis line
        ),
        font=dict(
            color='#333333',
            size=28,
            family='Open Sans, sans-serif'
        ),
        colorway=["#348273", "#280F3C", "#CC5500", "#333333", "#2E86C1", "#9B59B6", "#28B463", "#F39C12", "#E74C3C", "#3498DB"],
        title=go.layout.Title(
            text='',
            font=dict(
                size=34,
                color='#333333',
                family='Open Sans, sans-serif'
            ),
            x=0.05,
        )
    ),
    data=dict(
        scatter=[
            go.Scatter(
                line=dict(width=6)  # Set the line width for scatter plots
            )
        ]
    )
)

# Register the base template
pio.templates['base'] = base
pio.templates.default = 'base'

In [34]:
import pandas as pd
from IPython.display import display, HTML

# Load the CSV file into a DataFrame named f_casualties_daily
f_casualties_daily = pd.read_csv('casualties_daily.csv')

# Only included cases from Gaza MOH
# f_casualties_daily = f_casualties_daily[f_casualties_daily['report_source'] == 'mohtel']

# Calculate ext_killed_men_cum and add it as a new column
f_casualties_daily['ext_killed_men_cum'] = (
    f_casualties_daily['ext_killed_cum'] - 
    f_casualties_daily['ext_killed_children_cum'] - 
    f_casualties_daily['ext_killed_women_cum']
)

# Calculate killed_children, killed_women, killed_men
f_casualties_daily['killed_children'] = f_casualties_daily['ext_killed_children_cum'].diff().fillna(f_casualties_daily['ext_killed_children_cum'])
f_casualties_daily['killed_women'] = f_casualties_daily['ext_killed_women_cum'].diff().fillna(f_casualties_daily['ext_killed_women_cum'])
f_casualties_daily['killed_men'] = f_casualties_daily['ext_killed_men_cum'].diff().fillna(f_casualties_daily['ext_killed_men_cum'])

# Reorder the columns to place all _cum columns together
columns_order = [
    'report_date', 'report_source', 'report_period', 'killed', 'killed_children', 'killed_women', 'killed_men',
    'ext_killed', 'ext_massacres_cum', 'killed_cum', 'ext_killed_cum', 'ext_killed_children_cum', 
    'ext_killed_women_cum', 'ext_killed_men_cum', 'injured_cum', 'ext_injured', 'ext_injured_cum', 
    'ext_civdef_killed_cum', 'med_killed_cum', 'ext_med_killed_cum', 'press_killed_cum', 'ext_press_killed_cum'
]

# Apply the new column order
f_casualties_daily = f_casualties_daily[columns_order]

# Display in a scrollable HTML table
scrollable_table = f"""
<div style="overflow-x:auto;">
    {f_casualties_daily.to_html(classes='table table-striped table-bordered', index=False)}
</div>
"""

display(HTML(scrollable_table))


report_date,report_source,report_period,killed,killed_children,killed_women,killed_men,ext_killed,ext_massacres_cum,killed_cum,ext_killed_cum,ext_killed_children_cum,ext_killed_women_cum,ext_killed_men_cum,injured_cum,ext_injured,ext_injured_cum,ext_civdef_killed_cum,med_killed_cum,ext_med_killed_cum,press_killed_cum,ext_press_killed_cum
2023-10-07,mohtel,24,232.0,0.0,0.0,232.0,232,0,232.0,232,0,0,232,1610.0,1610,1610,0,6.0,6,1.0,1
2023-10-08,mohtel,24,138.0,78.0,41.0,19.0,138,0,370.0,370,78,41,251,1788.0,178,1788,0,,6,1.0,1
2023-10-09,mohtel,24,190.0,13.0,20.0,157.0,190,8,560.0,560,91,61,408,2271.0,483,2271,0,6.0,6,3.0,3
2023-10-10,mohtel,24,340.0,169.0,169.0,2.0,340,8,900.0,900,260,230,410,4000.0,1729,4000,0,,6,7.0,7
2023-10-11,gmotel,24,200.0,138.0,0.0,62.0,200,23,1100.0,1100,398,230,472,5184.0,1184,5184,0,10.0,10,,7
2023-10-12,mohtel,24,317.0,102.0,46.0,169.0,317,23,1417.0,1417,500,276,641,5763.0,579,5763,0,11.0,11,,7
2023-10-13,missing,24,483.0,0.0,116.0,367.0,483,23,1900.0,1900,500,392,1008,,1475,7238,0,11.0,11,,7
2023-10-14,gmotel,24,328.0,100.0,208.0,20.0,328,23,2228.0,2228,600,600,1028,8714.0,1476,8714,0,11.0,11,,7
2023-10-15,gmotel,24,442.0,100.0,135.0,207.0,442,23,2670.0,2670,700,735,1235,9200.0,486,9200,0,,11,,7
2023-10-16,gmotel,24,138.0,153.0,201.0,-216.0,138,371,2808.0,2808,853,936,1019,10850.0,1650,10850,7,37.0,37,,7


In [24]:
import plotly.express as px

# Create the stacked bar chart
fig = px.bar(
    f_casualties_daily, 
    x='report_date', 
    y=['ext_killed_children_cum', 'ext_killed_women_cum', 'ext_killed_men_cum'],
    labels={'value': 'Number of Deaths', 'variable': 'Category'},
    title='Daily Deaths by Category: Children, Women, Men',
    template='base'  # Reference to the predefined Plotly template
)

# Update the layout for better visualization
fig.update_layout(
    barmode='stack',  # Stacked bar chart
    xaxis_title='Report Date',
    yaxis_title='Number of Deaths',
    legend_title_text='Category'
)

# Show the plot
fig.show()


In [32]:
import plotly.express as px

def plot_metric(f_casualties_daily, metric_column):
    """
    Plots a line chart for the given metric column from the f_casualties_daily DataFrame.
    
    Parameters:
    - f_casualties_daily (DataFrame): The DataFrame containing the data.
    - metric_column (str): The name of the metric column to be plotted.
    
    Returns:
    - A Plotly line chart figure.
    """
    
    # Create the line chart
    fig = px.line(
        f_casualties_daily,
        x='report_date',
        y=metric_column,
        title=f'Trend of {metric_column} Over Time',
        labels={metric_column: metric_column, 'report_date': 'Report Date'},
        template='base'  # Use the predefined template
    )
    
    # Update layout for better visualization
    fig.update_layout(
        xaxis_title='Report Date',
        yaxis_title=metric_column,
        showlegend=False  # Disable the legend since we're plotting a single metric
    )
    
    # Show the plot
    fig.show()

# Example usage:
plot_metric(f_casualties_daily, 'ext_killed_children_cum')
plot_metric(f_casualties_daily, 'ext_killed_women_cum')
            
# Example usage:
plot_metric(f_casualties_daily, 'killed_children')
plot_metric(f_casualties_daily, 'killed_women')


In [30]:
import pandas as pd

def calculate_correlation(df, column1, column2):
    """
    Calculates and returns the correlation between two columns in a DataFrame.
    
    Parameters:
    - df (DataFrame): The DataFrame containing the data.
    - column1 (str): The name of the first column.
    - column2 (str): The name of the second column.
    
    Returns:
    - correlation (float): The Pearson correlation coefficient between the two columns.
    """
    correlation = df[[column1, column2]].corr().iloc[0, 1]
    print(f"The correlation between {column1} and {column2} is: {correlation}")
    return correlation

# Example usage:
correlation = calculate_correlation(f_casualties_daily, 'ext_killed_children_cum', 'ext_killed_women_cum')
correlation = calculate_correlation(f_casualties_daily, 'killed_children', 'killed_women')

The correlation between ext_killed_children_cum and ext_killed_women_cum is: 0.9970268702625384
The correlation between killed_children and killed_women is: 0.6380023602738988


In [33]:
import plotly.express as px

# Calculate the total number of killed individuals (cumulative) for each day
f_casualties_daily['total_ext_killed_cum'] = (
    f_casualties_daily['ext_killed_children_cum'] + 
    f_casualties_daily['ext_killed_women_cum'] + 
    f_casualties_daily['ext_killed_men_cum']
)

# Calculate the percentage of men killed as a share of the total
f_casualties_daily['pct_men_killed'] = (
    f_casualties_daily['ext_killed_men_cum'] / f_casualties_daily['total_ext_killed_cum'] * 100
)

# Plot the percentage of men killed as a share of total using Plotly
fig = px.line(
    f_casualties_daily,
    x='report_date',
    y='pct_men_killed',
    title='Percentage of Men Killed as a Share of Total Over Time',
    labels={'pct_men_killed': 'Percentage of Men Killed', 'report_date': 'Report Date'},
    template='base'  # Use the predefined template
)

# Update layout for better visualization
fig.update_layout(
    xaxis_title='Report Date',
    yaxis_title='Percentage of Men Killed',
    yaxis=dict(tickformat='.2f%')  # Format y-axis as percentage with two decimal points
)

# Show the plot
fig.show()