# IVCSD Community Spaces Data Analysis

Welcome to the IVCSD Community Spaces Data Analysis notebook. In this notebook, we will be automatically analyzing the data provided by IVCSD. All graphs generated will be saved as a picture to the `output` folder. Please run the `setup` code cell first. Then for each analysis you want, you can simply run the corresponding code block under each heading. If you do not see the custom theme, run it again to refresh inline styles.

## Get the Data

### Calendar Data

The calendar data can be directly exported from the widget on the [IVCSD website](https://islavistacsd.ca.gov/iv-community-calendar). Make sure to select the datetime range that you are interested in and to select the "More Info" plus the "Minor Categories" option for filtering later on.

### Post Event Survey

This can be downloaded as a spreadsheet from the Post Event Google Form. Please contact your supervisor for access to this form and its respective data.

### Rental Fees

This can be downloaded directly as a spreadsheet from the Google Drive folder for rental fees. Please contact your supervisor for access to this folder and its respective data.

## Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from aquarel import load_theme

YEAR = 2024
TOTAL_NUM_EVENTS_IN_PREVIOUS_1_YEAR = 1130
TOTAL_NUM_EVENTS_IN_PREVIOUS_2_YEAR = 880

DEFINE_THEME = "arctic_light"
CALENDAR_ENCODING = 'Windows-1252'
MIN_FILTER_SIZE = 15

theme = load_theme(DEFINE_THEME)

dataCalendar = pd.read_csv("data/Calendar.csv", encoding=CALENDAR_ENCODING)

for row in dataCalendar.iterrows():
    if row[1]['Subject'].startswith('More Info:') or row[1]['Subject'].startswith(','):
        dataCalendar = dataCalendar.drop(row[0])

dataPostEvent = pd.read_csv("data/PostSurvey.csv")
dataPostEvent['Timestamp'] = pd.to_datetime(dataPostEvent['Timestamp'])
dataPostEvent['Year'] = dataPostEvent['Timestamp'].dt.year
dataPostEvent = dataPostEvent[dataPostEvent['Year'] == YEAR]

dataFees = pd.read_csv("data/RentalFees.csv")

# %matplotlib inline

# Community Center/Room/Other Space Usage Analysis

## Number of Events by Month


In [2]:
%matplotlib inline
dataCalendar['Start Date'] = pd.to_datetime(dataCalendar['Start Date'])
dataCalendar['Month'] = dataCalendar['Start Date'].dt.month

monthlyEvents = dataCalendar['Month'].value_counts().sort_index()

theme.apply()
fig, ax = plt.subplots(figsize=(10, 6))
monthlyEvents.plot(kind='bar', ax=ax)
ax.set_title('Number of Events by Month', fontsize=16)
ax.set_xlabel('Month', fontsize=14)
ax.set_ylabel('Number of Events', fontsize=14)
ax.set_xticks(range(12))
ax.set_xticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], rotation=45)
theme.apply_transforms()

fig.savefig("output/events_by_month.png", facecolor=fig.get_facecolor())

# Number of Events Between the Community Center and Room


In [3]:
%matplotlib inline
dataCalendar = dataCalendar.dropna(subset=['Location'])
communityCenterCount = dataCalendar[dataCalendar['Location'].str.contains('Isla Vista Community Center')].count()
communityRoomCount = dataCalendar[dataCalendar['Location'].str.contains('Isla Vista Community Room')].count()

theme.apply()
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(['Community Center', 'Community Room'], [communityCenterCount['Location'], communityRoomCount['Location']])
ax.set_title(f'Number of Events at Community Center vs Community Room in {YEAR}')
ax.set_ylabel('Number of Events')
ax.set_xlabel('Location')
theme.apply_transforms()

fig.savefig("output/events_between_center_room.png", facecolor=fig.get_facecolor())

## Breakdown of Events in Places Other than the Community Center and Room this Year


In [4]:
%matplotlib inline
locations = dataCalendar['Location'].value_counts()

for loc in locations.index:
    if loc == 'Isla Vista Community Center' or loc == 'Isla Vista Community Room' or locations[loc] < MIN_FILTER_SIZE:
        locations = locations.drop(loc)

theme.apply()
fig, ax = plt.subplots(figsize=(10, 6))
ax.barh(locations.index, locations)
ax.set_xlabel('Number of Events')
ax.set_ylabel('Location')
ax.set_title(f'Number of Events at Each Location in {YEAR}')
theme.apply_transforms()

fig.savefig("output/events_per_location.png", facecolor=fig.get_facecolor())

## Breakdown of Types of Events this Year


In [5]:
%matplotlib inline
eventTypes = dataCalendar['Primary Category'].value_counts()

for event in eventTypes.index:
    if eventTypes[event] < MIN_FILTER_SIZE:
        eventTypes = eventTypes.drop(event)

theme.apply()
fig, ax = plt.subplots(figsize=(10, 6))
ax.barh(eventTypes.index, eventTypes)
ax.set_title(f'Number of Each Event Type in {YEAR}')
ax.set_xlabel('Number of Events')
ax.set_ylabel('Event Type')
theme.apply_transforms()

fig.savefig("output/events_per_type.png", facecolor=fig.get_facecolor())

## Number of Events Between the Community Center and Room by Previous Year

In [6]:

%matplotlib inline

currentYearEventCount = dataCalendar.count().iloc[0]

theme.apply()
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar([f'{YEAR-2}', f'{YEAR-1}', f'{YEAR}'], [TOTAL_NUM_EVENTS_IN_PREVIOUS_2_YEAR, TOTAL_NUM_EVENTS_IN_PREVIOUS_1_YEAR, currentYearEventCount])
ax.set_title('Total Events Over the Years')
ax.set_ylabel('Number of Events')
ax.set_xlabel('Year')
theme.apply_transforms()

fig.savefig("output/total_events_between_years.png", facecolor=fig.get_facecolor())

## Number of Events by Category and Location


In [7]:
%matplotlib inline
data = dataCalendar.copy()
data['Location'] = data['Location'].str.replace('(?i).*Community Center.*', 'Isla Vista Community Center', regex=True)
data['Location'] = data['Location'].str.replace('(?i).*Community Room.*', 'Isla Vista Community Room', regex=True)
combinedData = data.groupby(['Location', 'Primary Category']).size().unstack(fill_value=0)

theme.apply()
fig, ax = plt.subplots(figsize=(16, 8))
combinedData.plot(kind='barh', stacked=True, ax=ax)

# Add labels and title
ax.set_title('Total Number of Events by Location and Category', fontsize=16)
ax.set_xlabel('Event Location', fontsize=14)
ax.set_ylabel('Number of Events', fontsize=14)
ax.legend(title='Category', bbox_to_anchor=(1.05, 1), loc='upper left')

theme.apply_transforms()

fig.savefig("output/total_events_per_category_per_location.png", facecolor=fig.get_facecolor())

## Distribution of Satisfaction Rating of the Center and Room


In [8]:
%matplotlib inline

centerData = dataPostEvent[dataPostEvent['Which Community Space did you use? '] == 'Community Center']
roomData = dataPostEvent[dataPostEvent['Which Community Space did you use? '] == 'Community Room']

centerData = centerData['How satisfied were you with the event space?'].fillna(0).astype(int).value_counts()
roomData = roomData['How satisfied were you with the event space?'].fillna(0).astype(int).value_counts()

centerGraph = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
roomGraph = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}

for i in centerData.index:
    centerGraph[i] = centerData[i]

for i in roomData.index:
    roomGraph[i] = roomData[i]

theme.apply()
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(centerGraph.keys(), centerGraph.values(), label='Community Center')
ax.fill_between(centerGraph.keys(), centerGraph.values(), alpha=0.3)

ax.plot(roomGraph.keys(), roomGraph.values(), label='Community Room')
ax.fill_between(roomGraph.keys(), roomGraph.values(), alpha=0.3)

ax.set_title(f'Satisfaction with Event Space in {YEAR}')
ax.set_xlabel('Satisfaction Level')
ax.set_ylabel('Number of Responses')
ax.set_ybound(0, max(centerData.max(), roomData.max())+10)
theme.apply_transforms()


fig.savefig("output/satisfaction_with_event_space.png")

## Distribution of Reservation Rating of the Center and Room


In [9]:
%matplotlib inline

centerData = dataPostEvent[dataPostEvent['Which Community Space did you use? '] == 'Community Center']
roomData = dataPostEvent[dataPostEvent['Which Community Space did you use? '] == 'Community Room']

centerData = centerData['How did you feel about the reservation process?'].fillna(0).astype(int).value_counts()
roomData = roomData['How did you feel about the reservation process?'].fillna(0).astype(int).value_counts()

centerGraph = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
roomGraph = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}

for i in centerData.index:
    centerGraph[i] = centerData[i]

for i in roomData.index:
    roomGraph[i] = roomData[i]

theme.apply()
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(centerGraph.keys(), centerGraph.values(), label='Community Center')
ax.fill_between(centerGraph.keys(), centerGraph.values(), alpha=0.3)

ax.plot(roomGraph.keys(), roomGraph.values(), label='Community Room')
ax.fill_between(roomGraph.keys(), roomGraph.values(), alpha=0.3)

ax.set_title(f'Satisfaction with Space Reservation Process in {YEAR}')
ax.set_xlabel('Satisfaction Level')
ax.set_ylabel('Number of Responses')
ax.set_ybound(0, max(centerData.max(), roomData.max())+10)
theme.apply_transforms()


fig.savefig("output/satisfaction_with_reservation_of_space.png")

## Demographic Distribution based on Survey Data

In [10]:
%matplotlib inline

data = pd.DataFrame(dataPostEvent['What were the demographics of your event?'].dropna().str.split(', ', expand=True).values.flatten()).dropna().value_counts()

data.index = data.index.levels[-1].astype(str)


theme.apply()
fig, ax = plt.subplots(figsize = (10, 6))
ax.barh(data.index, data.values)
ax.set_title('Demographics of Events based on Post Survey Responses', fontsize = 16)
ax.set_xlabel('Number of Events', fontsize = 14)
ax.set_ylabel('Demographic', fontsize = 14)
theme.apply_transforms()

fig.savefig("output/total_events_per_category_per_location.png", facecolor=fig.get_facecolor())

## Number of Events by Start Time Between Weekdays and Weekends


In [11]:
%matplotlib inline

dataCalendar['Start Date'] = pd.to_datetime(dataCalendar['Start Date'])
dataCalendar['Start Time'] = pd.to_datetime(dataCalendar['Start Time'], format='%I:%M %p').dt.time

dataCalendar['Day Type'] = dataCalendar['Start Date'].dt.dayofweek.apply(lambda x: 'Weekend' if x >= 5 else 'Weekday')
dataCalendar['Start Hour'] = pd.to_datetime(dataCalendar['Start Time'], format='%H:%M:%S').dt.hour


timeCounts = dataCalendar.groupby(['Day Type', 'Start Hour']).size().unstack(fill_value=0)


# Plot the data
theme.apply()
fig, ax = plt.subplots(figsize=(18, 8))
timeCounts.T.plot(kind='line', ax=ax)
ax.set_title(f'Number of Events by Start Times (Weekdays vs Weekends) in {YEAR}', fontsize=16)
ax.set_xlabel('Start Time', fontsize=14)
ax.set_ylabel('Number of Events', fontsize=14)

ax.set_xticks(range(24))
ax.set_xticklabels([f'{hour}:00' for hour in range(24)], rotation=45)
theme.apply_transforms()

fig.savefig("output/events_by_start_time_weekday_vs_weekend.png", facecolor=fig.get_facecolor())

## Number of Events by Duration Between Weekdays and Weekends

In [12]:
%matplotlib inline
dataCalendar['Duration (hours)'] = pd.to_timedelta(dataCalendar['Duration (hh:mm:ss)']).dt.total_seconds() / 3600


bins = [0, 1, 2, 4, 8, 12, 24]
labels = ['<1h', '1-2h', '2-4h', '4-8h', '8-12h', '12-24h']
dataCalendar['Duration Range'] = pd.cut(dataCalendar['Duration (hours)'], bins=bins, labels=labels, right=False)

durationCounts = dataCalendar.groupby(['Day Type', 'Duration Range'], observed=False).size().unstack(fill_value=0, )

theme.apply()
fig, ax = plt.subplots(figsize=(12, 6))
durationCounts.T.plot(kind='bar', ax=ax)
ax.set_title(f'Number of Events by Duration (Weekdays vs Weekends) in {YEAR}', fontsize=16)
ax.set_xlabel('Duration Range', fontsize=14)
ax.set_ylabel('Number of Events', fontsize=14)
ax.legend(title='Day Type', bbox_to_anchor=(1.05, 1), loc='upper left')
theme.apply_transforms()


fig.savefig("output/events_by_duration_weekday_vs_weekend.png", facecolor=fig.get_facecolor())

## Average Length of Events Between Repeating and Single Events


In [13]:
%matplotlib inline
dataCalendar['Is Repeating'] = dataCalendar['Repeat Id'].apply(lambda x: 'Repeating' if x != 0 else 'Single')
average_duration = dataCalendar.groupby('Is Repeating')['Duration (hours)'].mean()

# Plot the data
theme.apply()
fig, ax = plt.subplots(figsize=(10, 6))
average_duration.plot(kind='bar', ax=ax)
ax.set_title('Average Length of Events Between Repeating vs Single Types', fontsize=16)
ax.set_xlabel('Event Type', fontsize=14)
ax.set_ylabel('Average Duration (hours)', fontsize=14)
theme.apply_transforms()

fig.savefig("output/average_length_repeating_vs_single.png", facecolor=fig.get_facecolor())

## Word Cloud and Sentiment Analysis of Survey Responses

In [63]:
%matplotlib inline

import emoji
import string
import nltk
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud

nltk.download('vader_lexicon', quiet=True)
sia = SentimentIntensityAnalyzer()

badWords = set(stopwords.words('english'))
badPunct = set(string.punctuation)

feedbacks = dataPostEvent['Any other feedback, improvements, comments or concerns?'].dropna().to_numpy()
sentiment_scores = sia.polarity_scores(' '.join(feedbacks))

theme.apply()
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(['Positive', 'Neutral', 'Negative'], [sentiment_scores['pos'], sentiment_scores['neu'], sentiment_scores['neg']])
ax.set_title(f'Sentiment Analysis of Feedback from Post Event Survey for {YEAR}', fontsize=16)
ax.set_xlabel('Sentiment Type', fontsize=14)
ax.set_ylabel('Confidence (%)', fontsize=14)
ax.text(0, -0.15, f"The overall sentiment is {"positive" if sentiment_scores['compound'] > 0 else "negative" if sentiment_scores['compound'] < 0 else "neutral"}.", ha='left', va='center')
theme.apply_transforms()

tokens = ' '.join(feedbacks).split()

def cleanToken(token):
    token = emoji.replace_emoji(token, replace='')
    token = token.lower()
    token = ''.join(char for char in token if char not in badPunct)
    
    if token not in badWords:
        return token
    
    return ''

tokens = [cleanToken(token) for token in tokens if cleanToken(token)]

wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(tokens))

wordcloud.to_file('output/feedback_wordcloud.png')
fig.savefig("output/sentiment_analysis_of_feedback.png", facecolor=fig.get_facecolor())

# Community Center/Room Fee Analysis