In [2]:
import pandas as pd
import plotly.graph_objects as go
import re

In [3]:
# data from: https://www.singstat.gov.sg/publications/population/marriages-and-divorces
# Using sheet T1: T1 Cumulative Proportion of Dissolved Resident Marriages

# inspired from: Straits Time Article titled:
# Largest share of divorces and annulments in S’pore is among those wed for between 5 and 10 years
# https://www.straitstimes.com/singapore/largest-share-of-divorces-and-annulments-in-s-pore-is-among-those-wed-for-between-5-and-10-years

file_path = "data_files/Dissolution of Marriages Among Marriage Cohorts.xlsx"
df = pd.read_excel(file_path, sheet_name="T1", skiprows=6)# index_col=0) #x-axis: Before xth Anniversary, y-axis: Marriage Cohort
df = df.rename({"Unnamed: 0": "marriage_cohort"}, axis=1)
df = df.rename(columns={col: f"before_{col}_anniversary" for col in df.columns if col != "marriage_cohort"})
df['before_0_anniversary'] = 0 #adding 0th year for calculations

columns_order = ['marriage_cohort', 'before_0_anniversary'] + [col for col in df.columns[1:-1]]
df = df[columns_order]

# Filter columns to plot only "before_{num}_anniversary" where num is in multiples of 5 and "before_1_anniversary"
columns_to_plot = [column for column in df.columns if column.startswith("before_") and (int(column.split("_")[1]) % 5 == 0)]

# Calculate incremental values for each selected column
df_incremental = df[columns_to_plot].set_index(df["marriage_cohort"]).diff(axis=1).reset_index().round(2)

In [4]:
def snake_case_to_readable(text):
    """
    Convert a snake_case string to a more readable format.
    Example: "before_1_anniversary" will be converted to "Before 1 Anniversary".
    """
    words = text.split('_')
    capitalized_words = [word.capitalize() for word in words]
    return ' '.join(capitalized_words)

In [5]:
# Create a figure
fig = go.Figure()

# Add trace for each "before_{num}_anniversary" column
for column in df.columns[1:]:
    fig.add_trace(go.Scatter(x=df.marriage_cohort, y=df[column], mode='lines', name=column))

# Update layout
fig.update_layout(
    title="Cumulative Graph of before_{num}_anniversary",
    xaxis_title="Marriage Cohort",
    yaxis_title="Cumulative Value",
    showlegend=True,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    hovermode="x"
)

# Show the plot
fig.show()


In [6]:
# Create the column_names_mapping dictionary using dictionary comprehension
column_names_mapping = {col: snake_case_to_readable(col) for col in columns_to_plot[1:]}

# Create a figure
fig = go.Figure()

# Add trace for each selected column as a stacked cumulative bar chart
for column in columns_to_plot[1:]:
    if column != "marriage_cohort":
        fig.add_trace(go.Bar(
            x=df_incremental["marriage_cohort"],
            y=df_incremental[column],
            name=column_names_mapping[column],  # Use the mapped name from the dictionary
        ))

# Update layout
fig.update_layout(
    title="Divorces and Annulments (% of yearly marriage cohort) before Xth Anniversary",
    xaxis_title="Marriage Cohort",
    yaxis_title="Divorces & Annulments (%)",
    showlegend=True,
    legend=dict(orientation="v", yanchor="top", y=0.99, xanchor="right", x=0.99),  # Place legend on the right of the graph
    barmode="stack",  # Use 'stack' for stacked cumulative bar chart
)

# Show the plot
fig.show()

In [7]:
# Create the column_names_mapping dictionary using dictionary comprehension
column_names_mapping = {col: snake_case_to_readable(col) for col in columns_to_plot[1:]}

# Create a figure
fig = go.Figure()

# Add trace for each selected column as a stacked cumulative bar chart
for column in columns_to_plot[1:]:
    if column != "marriage_cohort":
        fig.add_trace(go.Bar(
            x=df_incremental["marriage_cohort"],
            y=df_incremental[column],
            text=df_incremental[column],
            textposition='inside',
            insidetextanchor='middle',
            textangle=-90,
            name=column_names_mapping[column],  # Use the mapped name from the dictionary
        ))

# Update layout
fig.update_layout(
    title="Divorces and Annulments (% of yearly marriage cohort) before Xth Anniversary",
    xaxis_title="Marriage Cohort",
    yaxis_title="Divorces & Annulments (%)",
    showlegend=True,
    legend=dict(orientation="v", yanchor="top", y=0.99, xanchor="right", x=0.99),  # Place legend on the right of the graph
    barmode="stack",  # Use 'stack' for stacked cumulative bar chart
)

# Show the plot
fig.show()


In [8]:
# need divorce and annulments of all marriages by "before year anni". cant get the data.

In [9]:
file_path = "data_files/marriages.xlsx"
df_t2_raw = pd.read_excel(file_path, sheet_name="T2", skiprows=8)# index_col=0) #x-axis: Before xth Anniversary, y-axis: Marriage Cohort
df_t2 = df_t2_raw[:26].transpose().reset_index().drop(["index"], axis=1)

# Set the first row as the new column names
df_t2.columns = df_t2.iloc[0]

# Reset the index and drop the first row since it is now used as column headers
df_t2 = df_t2.reset_index(drop=True)
df_t2 = df_t2.drop(df_t2.index[0])

df_t2["Data Series"] = df_t2["Data Series"].astype(int)

In [10]:
df_2022 = df_t2.drop(['Male General Marriage Rate (Per 1,000 Unmarried Resident Males/Females Aged 15-49 Years)',
                      'Female General Marriage Rate (Per 1,000 Unmarried Resident Males/Females Aged 15-49 Years)',
                      'Crude Marriage Rate (Per 1,000 Residents)'
                      ], axis=1)
df_2022= df_2022[df_2022["Data Series"] == 2022]

In [11]:
def transform_column_name(column_name):
    """
    Transform the column name to a more readable format.
    Example: "  20 - 24 Years (Per 1,000 Unmarried Resident Females)" will be transformed to "20 - 24 y/o Females".
    """
    # Remove leading and trailing spaces from the column name
    column_name = column_name.strip()

    if "65 Years & Over (Per 1,000 Unmarried Resident Females)" in column_name:
        return ">=65 Female"
    elif "65 Years & Over (Per 1,000 Unmarried Resident Males)" in column_name:
        return ">=65 Male"

    # Check if the column contains "(Per" and handle it as "y/o"
    elif "(Per" in column_name:
        # Use regular expression to extract the age range and gender information
        match = re.search(r"(\d+ - \d+).*(Male|Female)", column_name)

        if match:
            age_range, gender = match.groups()
            return f"{age_range} y/o {gender}"

    # If "(Per" is not present and the specific column names are not found, return the original column name
    return column_name

In [12]:
# Transform the column names
df_2022 = df_2022.rename(columns={col: transform_column_name(col) for col in df_2022.columns})

# Remove the columns "Male General Marriage Rate" and "Female General Marriage Rate" from the age range list
age_ranges = [word[:7] for word in df_2022.columns[1:] if "y/o" in word][:10] + [">=65"]

In [13]:
# Extract male and female data
female_data = df_2022[df_2022.columns[df_2022.columns.str.contains("Female")]]
male_data = df_2022[df_2022.columns[df_2022.columns.str.contains("Male")]]

In [14]:
# Create a figure
fig = go.Figure()

# Add trace for female data as grouped bar chart
for i, column in enumerate(female_data.columns[:-1]):
    fig.add_trace(go.Bar(
        x=[age_ranges[i]],
        y=female_data[column].astype(float),
        text=female_data[column],
        textposition='outside',
        insidetextanchor='middle',
        textangle=0,
        name=column,
        offsetgroup="Female",  # To group the bars for female data
        marker_color="pink",  # Set the bar color for females
    ))

# Add trace for male data as grouped bar chart
for i, column in enumerate(male_data.columns[:-1]):
    fig.add_trace(go.Bar(
        x=[age_ranges[i]],
        y=male_data[column].astype(float),
        text=male_data[column],
        textposition='outside', #'auto',
        insidetextanchor='middle',
        textangle=0,
        name=column,
        offsetgroup="Male",  # To group the bars for male data
        marker_color="blue",  # Set the bar color for males
    ))

# Handle the ">=65" age group separately for females
fig.add_trace(go.Bar(
    x=[">=65"],
    y=df_2022[df_2022.columns[-1]].astype(float),  # Select the second last column for ">=65 Female"
    text=df_2022[df_2022.columns[-1]],
    textposition='outside',
    insidetextanchor='middle',
    textangle=0,
    name=">=65 Female",
    offsetgroup="Female",  # To group the bar with female data
    marker_color="pink",  # Set the bar color for females
))

# Handle the ">=65" age group separately for males
fig.add_trace(go.Bar(
    x=[">=65"],
    y=df_2022[df_2022.columns[11]].astype(float),  # Select the column for ">=65 Male"
    text=df_2022[df_2022.columns[11]],
    textposition='outside',
    insidetextanchor='middle',
    textangle=0,
    name=">=65 Male",
    offsetgroup="Male",  # To group the bar with male data
    marker_color="blue",  # Set the bar color for males
))

# Update layout
fig.update_layout(
    title="Marriages Per 1,000 Unmarried Residents of given Gender (2022)",
    xaxis_title="Age Range",
    yaxis_title="Marriages Per 1,000 Unmarried <br> Residents of given Gender",
    showlegend=False,
    barmode='group',  # Set the bars to be grouped
    annotations=[
        dict(x=1, y=1.25, xref='paper', yref='paper', text="LEGEND:",
             showarrow=False, font=dict(size=14, color='black')),
        dict(x=1, y=1.15, xref='paper', yref='paper', text="Pink: Female",
             showarrow=False, font=dict(size=14, color='pink')),
        dict(x=1, y=1.10, xref='paper', yref='paper', text="Blue: Male",
             showarrow=False, font=dict(size=14, color='blue'))
    ]
)

# Show the plot
fig.show()

In [15]:
# other findings: marriages.t2/3: Proportion Of Brides Aged Below 21 Years (Per Cent) about 3x of Proportion Of Grooms Aged Below 21 Years (Per Cent)

In [16]:
ar_data = {}
for f, m, ar in zip(female_data.iloc[0], male_data.iloc[0], age_ranges):
    ar_data[ar] = [f, m]

df_ar_data = pd.DataFrame(ar_data).transpose().rename({0: "Female", 1: "Male"}, axis=1)

In [17]:
# Plot the line graph
fig3 = go.Figure()

# Add trace for female data as a line chart
fig3.add_trace(go.Scatter(
    x=df_ar_data.index,
    y=df_ar_data["Female"],
    mode='lines+markers',
    name="Female",
    line=dict(color='pink', smoothing=1.1, shape='spline'),
    marker=dict(color='pink'),
))

# Add trace for male data as a line chart
fig3.add_trace(go.Scatter(
    x=df_ar_data.index,
    y=df_ar_data["Male"],
    mode='lines+markers',
    name="Male",
    line=dict(color='blue',smoothing=1.1, shape='spline'),
    marker=dict(color='blue'),
))

# Update layout
fig3.update_layout(
    title="Marriages Per 1,000 Unmarried Residents of Given Gender",
    xaxis_title="Age Range",
    yaxis_title="Marriages Per 1,000 Unmarried <br> Residents of Given Gender",
    showlegend=True,
)

# Show the plot
fig3.show()


In [18]:
df_t4_raw = pd.read_excel(file_path, sheet_name="T4", skiprows=10)# index_col=0) #x-axis: Before xth Anniversary, y-axis: Marriage Cohort
df_t4 = df_t4_raw[:25].transpose()

# # Reset the index and drop the first row since it is now used as column headers
df_t4 = df_t4.reset_index(drop=False)

# # Set the first row as the new column names
df_t4.columns = [word.strip() for word in df_t4.iloc[0]]
df_t4 = df_t4.drop([0], axis=0)

df_t4["Data Series"] = df_t4["Data Series"].astype(int)

In [19]:
df_t4["Age Diff <= 1"] = df_t4["Same Age"] + df_t4["Groom Older By 1 Year"] + df_t4["Groom Younger By 1 Year"]
df_t4["Age Diff <= 1 Ratio"] = df_t4["Age Diff <= 1"]/df_t4["Total"]
df_t4["Age Diff <= 1 Ratio"] = df_t4["Age Diff <= 1 Ratio"].apply(lambda x: round(x, 4))
df_t4["Age Diff > 1"] = df_t4["Total"] - df_t4["Age Diff <= 1"]
df_t4["Age Diff > 1 Ratio"] = df_t4["Age Diff > 1"] /df_t4["Total"]
df_t4["Age Diff > 1 Ratio"]  = df_t4["Age Diff > 1 Ratio"] .apply(lambda x: round(x, 4))

In [20]:
# Create the stacked bar chart using Plotly
fig4 = go.Figure()

# Add the "Age Diff <= 1" trace as a stacked bar
fig4.add_trace(go.Bar(
    x=df_t4['Data Series'],
    y=df_t4['Age Diff <= 1'],
    text=df_t4['Age Diff <= 1 Ratio'],
    name="Age Diff <= 1",
    marker_color='blue',
    textposition='inside',
    insidetextanchor='middle',
    textangle=-90,
))

# Add the "Age Diff > 1" trace as a stacked bar on top of the previous one
fig4.add_trace(go.Bar(
    x=df_t4['Data Series'],
    y=df_t4['Age Diff > 1'],
    text=df_t4['Age Diff > 1 Ratio'],
    name="Age Diff > 1",
    marker_color='red',
    base=df_t4['Age Diff <= 1'],  # Specify the base to stack the bars
    textposition='inside',
    insidetextanchor='middle',
    textangle=-90,
))

# Update layout
fig4.update_layout(
    title="Yearly Counts for 'Age Diff <= 1' and 'Age Diff > 1'",
    xaxis_title="Data Series (Year)",
    yaxis_title="Count",
    barmode='stack',  # Set the bars to be stacked
    showlegend=True,
)

# Show the plot
fig4.show()

In [21]:
### USING THIS
# Plot the line graph
fig4 = go.Figure()

fig4.add_trace(go.Scatter(
    x=df_t4['Data Series'],  # x-axis: 'Data Series' column
    y=df_t4['Age Diff <= 1 Ratio']*100,  # y-axis: 'Age Diff <= 1 Ratio' column
    mode='lines+markers',  # Connect dots with lines and add markers
    name='Age Diff <= 1 Ratio',
    line=dict(color='blue'),  # Line color
    marker=dict(color='blue'),  # Marker color
    text=df_t4['Age Diff <= 1 Ratio'] * 100,  # Text to display on markers (rounded to 2 decimal places)
    textposition='top center',  # Position of text on markers
))

# Filter the data to get the rows where 'Data Series' = 2021 and 'Data Series' = 2005
data_2021 = df_t4[df_t4['Data Series'] == 2021]
data_2005 = df_t4[df_t4['Data Series'] == 2005]

# Set the text label for the data point at year 2021
label_2021 = f"{data_2021['Age Diff <= 1 Ratio'].iloc[0] * 100:.2f}%"
fig4.add_annotation(
    x=data_2021['Data Series'].iloc[0],
    y=data_2021['Age Diff <= 1 Ratio'].iloc[0] * 100,
    text=label_2021,
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-40,
)

# Set the text label for the data point at year 2005
label_2005 = f"{data_2005['Age Diff <= 1 Ratio'].iloc[0] * 100:.2f}%"
fig4.add_annotation(
    x=data_2005['Data Series'].iloc[0],
    y=data_2005['Age Diff <= 1 Ratio'].iloc[0] * 100,
    text=label_2005,
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-40,
)

# Update layout
fig4.update_layout(
    title="% of Marriages with Couples of Age Gap <= 1 (1984-2022)",
    xaxis_title="Year",  # x-axis label
    yaxis_title="% of Marriages with Couples <br> of Age Gap <= 1",  # y-axis label
)

# Show the plot
fig4.show()

In [22]:
## https://www.mof.gov.sg/news-publications/press-releases/New-Package-Of-Measures-To-Support-Parenthood
# Interestingly, HDB Top-up Grant for Singles who Marry. 
# Currently, qualifying singles may receive a CPF housing grant of $11,000 to buy a resale flat. Singles who have obtained this grant, 
# and who marry on or after 1 Aug 2004, will now receive a Top-Up Grant to the prevailing CPF family housing grant.

# Conclusion: 
# Those living in Singapore look to just marry within their age range (+-2)

In [23]:
df_t4["Age Diff <= 2"] = df_t4["Same Age"] + df_t4["Groom Older By 1 Year"] + df_t4["Groom Older By 2 Years"] + df_t4["Groom Younger By 1 Year"] + df_t4["Groom Younger By 2 Years"]
df_t4["Age Diff <= 2 Ratio"] = df_t4["Age Diff <= 2"]/df_t4["Total"]
df_t4["Age Diff <= 2 Ratio"] = df_t4["Age Diff <= 2 Ratio"].apply(lambda x: round(x, 4))
df_t4["Age Diff > 2"] = df_t4["Total"] - df_t4["Age Diff <= 2"]
df_t4["Age Diff > 2 Ratio"] = df_t4["Age Diff > 2"] /df_t4["Total"]
df_t4["Age Diff > 2 Ratio"]  = df_t4["Age Diff > 2 Ratio"] .apply(lambda x: round(x, 4))

In [24]:
### USING THIS
# Plot the line graph
fig5 = go.Figure()

fig5.add_trace(go.Scatter(
    x=df_t4['Data Series'],  # x-axis: 'Data Series' column
    y=df_t4['Age Diff <= 2 Ratio']*100,  # y-axis: 'Age Diff <= 1 Ratio' column
    mode='lines+markers',  # Connect dots with lines and add markers
    name='Age Diff <= 2 Ratio',
    line=dict(color='blue'),  # Line color
    marker=dict(color='blue'),  # Marker color
    text=df_t4['Age Diff <= 2 Ratio'] * 100,  # Text to display on markers (rounded to 2 decimal places)
    textposition='top center',  # Position of text on markers
))

# Filter the data to get the rows where 'Data Series' = 2021 and 'Data Series' = 2005
data_2021 = df_t4[df_t4['Data Series'] == 2021]
data_2005 = df_t4[df_t4['Data Series'] == 2005]

# Set the text label for the data point at year 2021
label_2021 = f"{data_2021['Age Diff <= 2 Ratio'].iloc[0] * 100:.2f}%"
fig5.add_annotation(
    x=data_2021['Data Series'].iloc[0],
    y=data_2021['Age Diff <= 2 Ratio'].iloc[0] * 100,
    text=label_2021,
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-40,
)

# Set the text label for the data point at year 2005
label_2005 = f"{data_2005['Age Diff <= 2 Ratio'].iloc[0] * 100:.2f}%"
fig5.add_annotation(
    x=data_2005['Data Series'].iloc[0],
    y=data_2005['Age Diff <= 2 Ratio'].iloc[0] * 100,
    text=label_2005,
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-40,
)


# Update layout
fig5.update_layout(
    title="% of Marriages with Couples of Age Gap <= 2 (1984-2022)",
    xaxis_title="Year",  # x-axis label
    yaxis_title="% of Marriages with Couples <br> of Age Gap <= 2",  # y-axis label
)

# Show the plot
fig5.show()

In [25]:
## https://www.census.gov/content/dam/Census/library/working-papers/2023/demo/sehsd-wp2023-10.pdf
# In contrast, in the US for 2021, the Avg Age Gap for opposite-sex married couples is about 3.69 years.

In [26]:
df_t16_raw = pd.read_excel(file_path, sheet_name="T16", skiprows=10)# index_col=0) #x-axis: Before xth Anniversary, y-axis: Marriage Cohort
df_t16 = df_t16_raw[:13].transpose()
df_t16_chinese = df_t16_raw[13*1:13*2].transpose()
df_t16_indians = df_t16_raw[13*2:13*3].transpose()
df_t16_others = df_t16_raw[13*3:13*4].transpose()
df_t16_inter_wc = df_t16_raw[13*4:13*5].transpose()

df_t16_list = [df_t16, df_t16_chinese, df_t16_indians, df_t16_others, df_t16_inter_wc]

def align_cols_data(df):
    # # Reset the index and drop the first row since it is now used as column headers
    df = df.reset_index(drop=False)

    # # Set the first row as the new column names
    df.columns = [word.strip() for word in df.iloc[0]]
    df = df.drop([0,1], axis=0)

    df["Data Series"] = df["Data Series"].astype(int)

    # # # Reset the index and drop the first row since it is now used as column headers
    # df = df.reset_index(drop=False)

    return df

# # # Set the first row as the new column names
# df_t16.columns = [word.strip() for word in df_t16.iloc[0]]
# df_t16 = df_t16.drop([0], axis=0)

# df_t16["Data Series"] = df_t16["Data Series"].astype(int)
df_t16_aligned_list = [align_cols_data(df) for df in df_t16_list]
    

In [27]:
df_total = df_t16_aligned_list[0]
df_aligned_chinese = df_t16_aligned_list[1]
df_aligned_indian = df_t16_aligned_list[2]
df_aligned_others = df_t16_aligned_list[3]
df_aligned_inter_wc = df_t16_aligned_list[4]

In [28]:
# Create the figure
fig6 = go.Figure()

# Get the months for the x-axis
months = df_total.columns[2:]
df_total = df_total.reset_index(drop=True)

# Define the names of the months
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

# Filter data for the years 2013 to 2022
df_total_filt = df_total[df_total['Data Series'].between(2013, 2022)].reset_index(drop=True)


# Plot the lines for each year's data
for i in range(len(df_total_filt)):
    year = df_total_filt['Data Series'][i]
    values = df_total_filt.loc[i, months].values
    fig6.add_trace(go.Scatter(x=month_names, y=values, mode='lines+markers', name=str(year)))

# Update layout
fig6.update_layout(
    title="Annual Marriages By Month Of Marriage (Under The Women's Charter)",
    xaxis_title="Months",
    yaxis_title="Number of Marriages",
    showlegend=True,
)

# Show the plot
fig6.show()


In [29]:
# Create the figure
fig7 = go.Figure()

# Get the months for the x-axis
months = df_aligned_chinese.columns[2:]
df_aligned_chinese = df_aligned_chinese.reset_index(drop=True)

# Define the names of the months
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

# Filter data for the years 2013 to 2022
df_aligned_chinese_filt = df_aligned_chinese[df_aligned_chinese['Data Series'].between(2013, 2022)].reset_index(drop=True)

# Plot the lines for each year's data
for i in range(len(df_aligned_chinese_filt)):
    year = df_aligned_chinese_filt['Data Series'][i]
    values = df_aligned_chinese_filt.loc[i, months].values
    fig7.add_trace(go.Scatter(x=month_names, y=values, mode='lines+markers', name=str(year)))

# Update layout
fig7.update_layout(
    title="Annual Marriages By Month Of Marriage (Under The Women's Charter - Chinese Ethnic Group)",
    xaxis_title="Months",
    yaxis_title="Number of Marriages",
    showlegend=True,
)

# Show the plot
fig7.show()


In [30]:
# Create the figure
fig8 = go.Figure()

# Get the months for the x-axis
months = df_aligned_indian.columns[2:]
df_aligned_indian = df_aligned_indian.reset_index(drop=True)

# Define the names of the months
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

# Filter data for the years 2013 to 2022
df_aligned_indian_filt = df_aligned_indian[df_aligned_indian['Data Series'].between(2013, 2022)].reset_index(drop=True)

# Plot the lines for each year's data
for i in range(len(df_aligned_indian_filt)):
    year = df_aligned_indian_filt['Data Series'][i]
    values = df_aligned_indian_filt.loc[i, months].values
    fig8.add_trace(go.Scatter(x=month_names, y=values, mode='lines+markers', name=str(year)))

# Update layout
fig8.update_layout(
    title="Annual Marriages By Month Of Marriage (Under The Women's Charter - Indian Ethnic Group)",
    xaxis_title="Months",
    yaxis_title="Number of Marriages",
    showlegend=True,
)

# Show the plot
fig8.show()


In [31]:
# Create the figure
fig9 = go.Figure()

# Get the months for the x-axis
months = df_aligned_others.columns[2:]
df_aligned_others = df_aligned_others.reset_index(drop=True)

# Define the names of the months
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

# Filter data for the years 2013 to 2022
df_aligned_others_filt = df_aligned_others[df_aligned_others['Data Series'].between(2013, 2022)].reset_index(drop=True)

# Plot the lines for each year's data
for i in range(len(df_aligned_others_filt)):
    year = df_aligned_others_filt['Data Series'][i]
    values = df_aligned_others_filt.loc[i, months].values
    fig9.add_trace(go.Scatter(x=month_names, y=values, mode='lines+markers', name=str(year)))

# Update layout
fig9.update_layout(
    title="Annual Marriages By Month Of Marriage (Under The Women's Charter - Other Ethnic Groups)",
    xaxis_title="Months",
    yaxis_title="Number of Marriages",
    showlegend=True,
)

# Show the plot
fig9.show()


In [32]:
# Create the figure
fig10 = go.Figure()

# Get the months for the x-axis
months = df_aligned_inter_wc.columns[2:]
df_aligned_inter_wc = df_aligned_inter_wc.reset_index(drop=True)

# Define the names of the months
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

# Filter data for the years 2013 to 2022
df_aligned_inter_wc_filt = df_aligned_inter_wc[df_aligned_inter_wc['Data Series'].between(2013, 2022)].reset_index(drop=True)

# Plot the lines for each year's data
for i in range(len(df_aligned_inter_wc_filt)):
    year = df_aligned_inter_wc_filt['Data Series'][i]
    values = df_aligned_inter_wc_filt.loc[i, months].values
    fig10.add_trace(go.Scatter(x=month_names, y=values, mode='lines+markers', name=str(year)))

# Update layout
fig10.update_layout(
    title="Annual Marriages By Month Of Marriage (Inter-ethnic Marriages under the Women's Charter)",
    xaxis_title="Months",
    yaxis_title="Number of Marriages",
    showlegend=True,
)

# Show the plot
fig10.show()


In [33]:
## Conclusion: Get married in August if you want a discount, since there's less demand
## If you don't like attending weddings, maybe book a flight out of Singapore in December

In [34]:
# for above: chinese fall in Marriage rates in Aug/Apr is about proportionate to buddhist/taoist religion composition.

In [40]:
df_aligned_chinese_filt["Average"] = df_aligned_chinese_filt["Chinese"]/12
df_aligned_chinese_filt['April fall from Avg'] = 1 - df_aligned_chinese_filt["April"] / df_aligned_chinese_filt["Average"]
df_aligned_chinese_filt['August fall from Avg'] = 1- df_aligned_chinese_filt["August"] / df_aligned_chinese_filt["Average"]

In [46]:
# Calculate the average of 'August fall from Avg' and 'April fall from Avg' for the years excluding 2020
filtered_years = df_aligned_chinese_filt[df_aligned_chinese_filt['Data Series'] != 2020]

august_avg = filtered_years['August fall from Avg'].mean()
april_avg = filtered_years['April fall from Avg'].mean()

print("Average of 'August fall from Avg' (excluding 2020):", august_avg)
print("Average of 'April fall from Avg' (excluding 2020):", april_avg)

Average of 'August fall from Avg' (excluding 2020): 0.4782833219315161
Average of 'April fall from Avg' (excluding 2020): 0.48911996318135376


In [None]:
## https://www.singstat.gov.sg/-/media/files/publications/cop2020/sr1/findings.pdf
# 2020: Proportion of Buddhist + Taoists in Chinese = 52%
# 2010: Proportion of Buddhist + Taoists in Chinese = 57.4%