In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

print("Jupyter Notebook setup is complete!")

# Load dataset
df = pd.read_csv('crimedata.csv', sep=',', encoding="ISO-8859-1")

# Rename and clean columns
df.columns = df.columns.str.strip()  # Remove leading/trailing whitespace
df = df.rename(columns={'Êcommunityname': 'Community Name'})

# Replace '?' with NaN for numeric conversion
df.replace('?', np.nan, inplace=True)

# Convert 'ViolentCrimesPerPop' to numeric
df['ViolentCrimesPerPop'] = pd.to_numeric(df['ViolentCrimesPerPop'], errors='coerce')

# Drop rows with NaN in critical column
df.dropna(subset=['ViolentCrimesPerPop'], inplace=True)

# Compute the mean of ViolentCrimesPerPop
violent_crimes_mean = df['ViolentCrimesPerPop'].mean()

# Add a column with the mean value (optional)
df['mean_violent_crimes'] = violent_crimes_mean

# Create binary flag based on whether value is above or below mean
df['violent_crime_occurrence'] = np.where(df['ViolentCrimesPerPop'] >= violent_crimes_mean, 1, 0)

# Group by this flag and show average of numeric fields
result = df.groupby('violent_crime_occurrence').mean(numeric_only=True)

# Output the result
print(result)

# Optional: display first few rows
df.head()


Jupyter Notebook setup is complete!
                          crimeOccurence      fold    population  \
violent_crime_occurrence                                           
0                               0.844801  5.539755  28451.304281   
1                               0.830904  5.457726  97631.271137   

                          householdsize  racepctblack  racePctWhite  \
violent_crime_occurrence                                              
0                              2.702255      4.419618     90.535275   
1                              2.715496     19.214971     70.054461   

                          racePctAsian  racePctHisp  agePct12t21  agePct12t29  \
violent_crime_occurrence                                                        
0                             2.613586     5.149083    14.316995    27.106491   
1                             3.012391    14.835758    14.648994    28.591429   

                          ...  PctSameCity85  PctSameState85   LandArea  \
violen

Unnamed: 0,crimeOccurence,Community Name,state,countyCode,communityCode,fold,population,householdsize,racepctblack,racePctWhite,...,larcenies,larcPerPop,autoTheft,autoTheftPerPop,arsons,arsonsPerPop,ViolentCrimesPerPop,nonViolPerPop,mean_violent_crimes,violent_crime_occurrence
0,1,BerkeleyHeightstownship,NJ,39.0,5320.0,1,11980,3.1,1.37,91.78,...,138,1132.08,16,131.26,2.0,16.41,41.02,1394.59,589.078922,0
1,1,Marpletownship,PA,45.0,47616.0,1,23123,2.82,0.8,95.57,...,376,1598.78,26,110.55,1.0,4.25,127.56,1955.95,589.078922,0
2,1,Tigardcity,OR,,,1,29344,2.43,0.74,94.33,...,1797,4972.19,136,376.3,22.0,60.87,218.59,6167.51,589.078922,0
3,1,Gloversvillecity,NY,35.0,29443.0,1,16656,2.4,1.7,97.35,...,716,4142.56,47,271.93,,,306.64,,589.078922,0
5,1,Springfieldcity,MO,,,1,140494,2.45,2.51,95.65,...,7690,5091.64,454,300.6,134.0,88.72,442.95,6867.42,589.078922,0
