In [3]:
import pandas as pd
import plotly.express as px

In [4]:
# Load the CSV file
mood_file_path = 'mood_data.csv'
data = pd.read_csv(mood_file_path)
data.head()
shooting_file_path = 'school-shootings-data.csv' # https://github.com/washingtonpost/data-school-shootings/blob/master/school-shootings-data.csv
school_shooting_data = pd.read_csv(shooting_file_path)
school_shooting_data.head()


Unnamed: 0,uid,nces_school_id,school_name,nces_district_id,district_name,date,school_year,year,time,day_of_week,...,lat,long,staffing,low_grade,high_grade,lunch,county,state_fips,county_fips,ulocale
0,1,80480000707,Columbine High School,804800.0,Jefferson County R-1,4/20/1999,1998-1999,1999,11:19 AM,Tuesday,...,39.60391,-105.075,89.6,9,12,41,Jefferson County,8,8059,21.0
1,2,220054000422,Scotlandville Middle School,2200540.0,East Baton Rouge Parish School Board,4/22/1999,1998-1999,1999,12:30 PM,Thursday,...,30.529958,-91.169966,39.0,6,8,495,East Baton Rouge Parish,22,22033,12.0
2,3,130441001591,Heritage High School,1304410.0,Rockdale County,5/20/1999,1998-1999,1999,8:03 AM,Thursday,...,33.626922,-84.04796,84.0,9,12,125,Rockdale County,13,13247,21.0
3,4,421899003847,John Bartram High School,4218990.0,Philadelphia City SD,10/4/1999,1999-2000,1999,10:00 AM,Monday,...,39.921509,-75.234108,41.0,9,12,2007,Philadelphia County,42,42101,11.0
4,5,250279000225,Dorchester High School,2502790.0,Boston,11/3/1999,1999-2000,1999,7:40 AM,Wednesday,...,42.285268,-71.075901,,9,12,543,Suffolk County,25,25025,11.0


# Data Preparation

In [5]:
import pandas as pd
import plotly.express as px

# Convert date columns to datetime format
data['full_date'] = pd.to_datetime(data['full_date'])
school_shooting_data['date'] = pd.to_datetime(school_shooting_data['date'], format='%m/%d/%Y')

# Define the end date for the visualization (June 30, 2023)
end_date = pd.to_datetime('2023-06-30')

# Filter mood data to only include entries up to the end date
filtered_mood_data = data[data['full_date'] <= end_date]

# Filter school shooting data to only include dates present in filtered mood data
min_date = filtered_mood_data['full_date'].min()
filtered_shooting_data = school_shooting_data[(school_shooting_data['date'] >= min_date) & (school_shooting_data['date'] <= end_date)]

# Create a column to count the number of shootings per day
shooting_counts = filtered_shooting_data.groupby('date').size().reset_index(name='shooting_count')

# Merge the filtered mood data with shooting counts
merged_data = pd.merge(filtered_mood_data, shooting_counts, how='left', left_on='full_date', right_on='date')
merged_data['shooting_count'].fillna(0, inplace=True)


# Correlation Visualization

In [6]:
# Map moods to numerical values
mood_mapping = {
    'amazing': 5,
    'good': 4,
    'meh': 3,
    'bad': 2,
    'awful': 1
}
merged_data['mood_score'] = merged_data['mood'].map(mood_mapping)

# Plot mood over time with shooting events
fig = px.line(merged_data, x='full_date', y='mood_score', title='Mood Over Time with School Shootings', markers=True)

# Add shooting events as annotations or secondary markers
shooting_days = merged_data[merged_data['shooting_count'] > 0]

fig.add_scatter(x=shooting_days['full_date'], y=shooting_days['mood_score'],
                mode='markers', marker=dict(size=10, color='red'), name='School Shooting')

# Customize the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Mood Score',
    yaxis=dict(tickvals=[1, 2, 3, 4, 5]),
    xaxis_rangeslider_visible=True,
    template='plotly_dark'
)
fig.write_image("mood_over_time.png")

# Show the plot
fig.show()

# Statistics

In [7]:
# Correct Total Number of School Shootings
total_shootings = filtered_shooting_data.shape[0]

# Correct Number of Days with School Shootings
days_with_shootings = merged_data[merged_data['shooting_count'] > 0]['full_date'].nunique()

# Display the results
statistics = {
    "Average Mood Score (Days with Shootings)": average_mood_with_shootings,
    "Average Mood Score (Days without Shootings)": average_mood_without_shootings,
    "Total Number of School Shootings": total_shootings,
    "Number of Days with School Shootings": days_with_shootings,
    "Correlation between Mood Score and School Shootings": correlation
}

statistics


NameError: name 'average_mood_with_shootings' is not defined

# Conclusion
The analysis shows that the average mood score on days with school shootings (3.83) is slightly higher than on days without them (3.82), though the difference is minimal. The very low correlation (0.0045) between mood score and school shootings suggests no significant relationship. These findings highlight the complexity of emotional responses, where many factors influence mood, and broad data trends may not fully capture individual reactions to such events.