Importing data

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("../data/msg_extracts.csv")

In [3]:
data.head()

Unnamed: 0,subject,date,sender,to,cc,body_title_top,body_link1_title,body_post_classification,body_title,body_date,body_description,body_link2_title,body_link3_title,body_full,file_name
0,A Resident Posted a Crime Incident,"Wed, 17 Nov 2021 18:16:36 -0500",Ring Team <no-reply@neighborhoods.ring.com>,andrea.serrano@brookhavenga.gov,,Stolen Package at Berkshire at Lenox Park,Neighbors Public Safety Service <https://links...,A resident in your area just posted a crime in...,Stolen Package at Berkshire at Lenox Park,"November 17, 2021",Stolen package,Click Here to View Post <https://links.neighbo...,<https://ring.widen.net/content/yxyweylxpc/png...,Post Titled: Stolen Package at Berkshire at Le...,../data/neighbors_data/brookhaven/A Resident P...
1,A Resident Posted a Crime Incident,"Mon, 17 May 2021 08:38:51 -0400",Ring Team <no-reply@neighborhoods.ring.com>,travis.lewis@brookhavenga.gov,,Car,Neighbors Public Safety Service <https://links...,A resident in your area just posted a crime in...,Car,"May 17, 2021",2 am someone checking my car,Click Here to View Post <https://links.neighbo...,Check Out Your Feed <https://links.neighborhoo...,Post Titled: Car ...,../data/neighbors_data/brookhaven/A Resident P...
2,A Resident Posted a Crime Incident,"Thu, 20 May 2021 23:47:46 -0400","""Ring Team"" <no-reply@neighborhoods.ring.com>",andrea.serrano@brookhavenga.gov,,One or two people checking for unlocked car do...,Neighbors Public Safety Service <https://links...,A resident in your area just posted a crime in...,One or two people checking for unlocked car do...,"May 21, 2021",Click Here to View Post <https://links.neighbo...,See the full list of crime and safety incident...,Have questions?,Post Titled: One or two people checking for un...,../data/neighbors_data/brookhaven/A Resident P...
3,A Resident Posted a Crime Incident,"Sat, 09 Oct 2021 07:09:43 -0400",Ring Team <no-reply@neighborhoods.ring.com>,robert.orange@brookhavenga.gov,,Parked Cars destroyed at Briarhill,Neighbors Public Safety Service <https://links...,A resident in your area just posted a crime in...,Parked Cars destroyed at Briarhill,"October 9, 2021",Three parked cars near the 1200 building at Br...,Click Here to View Post <https://links.neighbo...,Check Out Your Feed <https://links.neighborhoo...,Post Titled: Parked Cars destroyed at Briarhil...,../data/neighbors_data/brookhaven/A Resident P...
4,A Resident Posted a Crime Incident,"Thu, 10 Jun 2021 07:49:36 -0400","""Ring Team"" <no-reply@neighborhoods.ring.com>",travis.lewis@brookhavenga.gov,,Checking cars again in Peachtree creek townshi...,Neighbors Public Safety Service <https://links...,A resident in your area just posted a crime in...,Checking cars again in Peachtree creek townshi...,"June 10, 2021",Checking cars again in Peachtree creek townshi...,Click Here to View Post <https://links.neighbo...,Check Out Your Feed <https://links.neighborhoo...,Post Titled: Checking cars again in Peachtree ...,../data/neighbors_data/brookhaven/A Resident P...


Examining Data Types

In [4]:
data.dtypes

subject                      object
date                         object
sender                       object
to                           object
cc                          float64
body_title_top               object
body_link1_title             object
body_post_classification     object
body_title                   object
body_date                    object
body_description             object
body_link2_title             object
body_link3_title             object
body_full                    object
file_name                    object
dtype: object

Converting Date into readable format

In [6]:
data['date'] = pd.to_datetime(data['date'], utc=True)


In [8]:
data.dtypes

subject                                  object
date                        datetime64[ns, UTC]
sender                                   object
to                                       object
cc                                      float64
body_title_top                           object
body_link1_title                         object
body_post_classification                 object
body_title                               object
body_date                                object
body_description                         object
body_link2_title                         object
body_link3_title                         object
body_full                                object
file_name                                object
dtype: object

Resampling Data Over Time

In [10]:
messages_over_time = data.set_index('date')

In [12]:
message_tally = messages_over_time.resample('M').count()


In [13]:
message_tally.head()

Unnamed: 0_level_0,subject,sender,to,cc,body_title_top,body_link1_title,body_post_classification,body_title,body_date,body_description,body_link2_title,body_link3_title,body_full,file_name
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-01-31 00:00:00+00:00,15,15,15,0,15,15,15,15,15,15,15,15,15,15
2021-02-28 00:00:00+00:00,15,15,15,0,15,15,15,15,15,15,15,15,15,15
2021-03-31 00:00:00+00:00,120,120,120,0,120,120,120,120,120,120,120,120,120,120
2021-04-30 00:00:00+00:00,15,15,15,0,15,15,15,15,15,15,15,15,15,15
2021-05-31 00:00:00+00:00,53,53,53,0,53,53,53,53,53,53,53,53,53,53


Using File Name column as a unique identifier to tally

In [14]:
monthly_message_count = message_tally['file_name']

In [19]:
monthly_message_count.head()

date
2021-01-31 00:00:00+00:00     15
2021-02-28 00:00:00+00:00     15
2021-03-31 00:00:00+00:00    120
2021-04-30 00:00:00+00:00     15
2021-05-31 00:00:00+00:00     53
Freq: M, Name: file_name, dtype: int64

Exporting to CSV

In [23]:
monthly_message_count.to_csv('../output/monthly_message_count.csv')

Checking for contents

In [26]:
data["includes_package"] = data["body_full"].apply(
    
    lambda x: "package" in str(x).lower()
)

In [27]:
data["includes_package"].value_counts()

False    380
True     114
Name: includes_package, dtype: int64

In [28]:
data["includes_car"] = data["body_full"].apply(
    
    lambda x: "car" in str(x).lower()
)

In [29]:
data["includes_car"].value_counts()

True     306
False    188
Name: includes_car, dtype: int64

SyntaxError: invalid syntax (3214540565.py, line 1)