In [2]:

#PARTE 2import pandas as pd
#RATIO DE ATENCION SOBRE REGISTRADO POR CAMPANHA
import pandas as pd


# Load the dataset
df = pd.read_csv('merged_with_latest_population.csv')

# Group by Campaign Name to calculate registrations and attendances
campaign_stats = df.groupby('Campaign Name').agg({
    'Registered': 'sum',
    'Attended': 'sum'
}).reset_index()

# Calculate attendance percentage
campaign_stats['Percentage Attended'] = (campaign_stats['Attended'] / campaign_stats['Registered'].replace(0, 1) * 100).round(2)

# Sort by percentage attended (descending)
campaign_stats = campaign_stats.sort_values('Percentage Attended', ascending=False)

# Generate summary table
print("### Overall Summary Table of Attendance Percentages")
print(campaign_stats[['Campaign Name', 'Registered', 'Attended', 'Percentage Attended']].to_markdown(index=False))

# Generate analysis and text-based bar chart for each campaign
print("\n### Analysis and Chart for Each Campaign")
for _, row in campaign_stats.iterrows():
    campaign = row['Campaign Name']
    registered = row['Registered']
    attended = row['Attended']
    percentage = row['Percentage Attended']

    # Text-based bar chart (each * represents ~2%)
    bar_length = int(percentage // 2)
    bar = '*' * bar_length

    # Analysis based on campaign performance
    if percentage == 100:
        analysis = (f"This workshop achieved perfect attendance, indicating high interest. All {registered} registered "
                    f"participants attended, suggesting effective promotion or relevant content. Replicate with similar hands-on themes.")
    elif percentage >= 80:
        analysis = (f"With {percentage}% attendance, this workshop shows strong engagement, likely due to its topic relevance. "
                    f"The small drop-off ({registered - attended} participants) might indicate scheduling conflicts. Consider follow-up sessions.")
    elif percentage >= 60:
        analysis = (f"At {percentage}% attendance, this workshop had solid turnout, reflecting interest in its focus. "
                    f"The {100 - percentage}% no-show rate could be addressed with reminders or incentives.")
    elif percentage >= 40:
        analysis = (f"This workshop had {percentage}% attendance, with significant drop-offs ({registered - attended} no-shows). "
                    f"Low appeal or unclear content may be factors; tailor regionally or add interactive elements.")
    else:
        analysis = (f"With {percentage}% attendance, this event underperformed despite {registered} registrations. "
                    f"Low conversion may stem from niche topic or timing; target high-population areas for better reach.")

    # Additional insights based on campaign type
    if 'Tech' in df[df['Campaign Name'] == campaign]['Campaign categories'].iloc[0]:
        analysis += " Tech theme aligns with high performers; ensure beginner-friendly content."
    if 'Propel event' in campaign:
        analysis += " As a propel event, focus on specific, targeted marketing."

    print(f"\n#### {campaign} ({percentage}%)")
    print(analysis)
    print(f"Chart: {bar}")

### Overall Summary Table of Attendance Percentages
| Campaign Name                                        |   Registered |   Attended |   Percentage Attended |
|:-----------------------------------------------------|-------------:|-----------:|----------------------:|
| [Workshop] Eleva tu fundraising con ChatGPT I (2024) |           70 |         54 |                 77.14 |
| [Workshop] Impulsa tu productividad con IA (2024)    |           80 |         60 |                 75    |
| [Workshop] Visibiliza tu causa con IA (2024)         |           83 |         58 |                 69.88 |
| [Propel event] Aplica a grants con confianza (2024)  |          193 |         92 |                 47.67 |
| [Workshop] Fortalece tu historia de impacto (2024)   |          352 |        133 |                 37.78 |
| [Workshop] Planea con OKRs (2024)                    |          254 |         95 |                 37.4  |
| [Propel event] Lidera con un mindset digital (2024)  |          207 |     

In [3]:
#TIEMPO DE CAMPNHA VS # DE CAMPANHAS(AGREGAR NUEVA COLUMNA(prom(ini.campanha,fin.campanha)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('merged_with_latest_population.csv')

# Convert date columns to datetime
df['Start Date'] = pd.to_datetime(df['Start Date'])
df['End Date'] = pd.to_datetime(df['End Date'])

# Calculate duration in days (inclusive of start and end dates)
df['Duration'] = (df['End Date'] - df['Start Date']).dt.days + 1

# Count rows per duration
duration_counts = df['Duration'].value_counts().reset_index()
duration_counts.columns = ['Duration (Days)', 'Number of Rows']
duration_counts = duration_counts.sort_values('Duration (Days)')

# Print table
print("### Table: Campaign Durations vs. Number of Rows")
print(duration_counts.to_markdown(index=False))

# Create bar chart
plt.figure(figsize=(10, 6))
sns.barplot(data=duration_counts, x='Duration (Days)', y='Number of Rows', color='#1f77b4')
plt.title('Number of Campaign Registrations by Duration (Days)')
plt.xlabel('Duration (Days)')
plt.ylabel('Number of Rows')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('duration_vs_rows.png')
plt.close()

print("\nChart saved as 'duration_vs_rows.png'")

### Table: Campaign Durations vs. Number of Rows
|   Duration (Days) |   Number of Rows |
|------------------:|-----------------:|
|                 1 |              233 |
|                16 |              607 |
|                17 |              207 |
|                23 |              352 |
|                30 |              443 |

Chart saved as 'duration_vs_rows.png'


In [4]:
#Ratio de atención vs # de campnahas
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('merged_with_latest_population.csv')

# Count rows per Attended status
attended_counts = df['Attended'].value_counts().reset_index()
attended_counts.columns = ['Attended Status', 'Number of Rows']
attended_counts['Attended Status'] = attended_counts['Attended Status'].map({1: 'Attended', 0: 'Not Attended'})

# Print table
print("### Table: Attended Status vs. Number of Rows")
print(attended_counts.to_markdown(index=False))

# Create bar chart
plt.figure(figsize=(8, 6))
sns.barplot(data=attended_counts, x='Attended Status', y='Number of Rows', color='#1f77b4')
plt.title('Number of Campaign Registrations by Attended Status')
plt.xlabel('Attended Status')
plt.ylabel('Number of Rows')
plt.tight_layout()
plt.savefig('attended_vs_rows.png')
plt.close()

print("\nChart saved as 'attended_vs_rows.png'")


### Table: Attended Status vs. Number of Rows
| Attended Status   |   Number of Rows |
|:------------------|-----------------:|
| Not Attended      |             1172 |
| Attended          |              670 |

Chart saved as 'attended_vs_rows.png'
