In [6]:
# Imports and plotting settings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path



In [7]:
# Load `state_crime.csv` (assumes file is next to the notebook)
data_path = Path('state_crime.csv')
df = pd.read_csv(data_path)
print(f'Loaded {data_path} — shape: {df.shape}')
df.head()

Loaded state_crime.csv — shape: (3115, 21)


Unnamed: 0,State,Year,Data.Population,Data.Rates.Property.All,Data.Rates.Property.Burglary,Data.Rates.Property.Larceny,Data.Rates.Property.Motor,Data.Rates.Violent.All,Data.Rates.Violent.Assault,Data.Rates.Violent.Murder,...,Data.Rates.Violent.Robbery,Data.Totals.Property.All,Data.Totals.Property.Burglary,Data.Totals.Property.Larceny,Data.Totals.Property.Motor,Data.Totals.Violent.All,Data.Totals.Violent.Assault,Data.Totals.Violent.Murder,Data.Totals.Violent.Rape,Data.Totals.Violent.Robbery
0,Alabama,1960,3266740,1035.4,355.9,592.1,87.3,186.6,138.1,12.4,...,27.5,33823,11626,19344,2853,6097,4512,406,281,898
1,Alabama,1961,3302000,985.5,339.3,569.4,76.8,168.5,128.9,12.9,...,19.1,32541,11205,18801,2535,5564,4255,427,252,630
2,Alabama,1962,3358000,1067.0,349.1,634.5,83.4,157.3,119.0,9.4,...,22.5,35829,11722,21306,2801,5283,3995,316,218,754
3,Alabama,1963,3347000,1150.9,376.9,683.4,90.6,182.7,142.1,10.2,...,24.7,38521,12614,22874,3033,6115,4755,340,192,828
4,Alabama,1964,3407000,1358.7,466.6,784.1,108.0,213.1,163.0,9.3,...,29.1,46290,15898,26713,3679,7260,5555,316,397,992


# State Crime — exploratory analysis


Datatype Check

In [9]:
import pandas as pd
from pathlib import Path

path = Path("state_crime.csv")
df = pd.read_csv(path)

print(f"Loaded {path} — shape: {df.shape}")
if df.shape[1] != 21:
    print(f"WARNING: expected 21 columns but found {df.shape[1]}")

dtypes = df.dtypes.reset_index()
dtypes.columns = ["column", "dtype"]
print("\nColumn datatypes:")
print(dtypes.to_string(index=False))

# If you want the result programmatically:
# dtypes_dict = df.dtypes.apply(lambda x: str(x)).to_dict()

Loaded state_crime.csv — shape: (3115, 21)

Column datatypes:
                       column   dtype
                        State  object
                         Year   int64
              Data.Population   int64
      Data.Rates.Property.All float64
 Data.Rates.Property.Burglary float64
  Data.Rates.Property.Larceny float64
    Data.Rates.Property.Motor float64
       Data.Rates.Violent.All float64
   Data.Rates.Violent.Assault float64
    Data.Rates.Violent.Murder float64
      Data.Rates.Violent.Rape float64
   Data.Rates.Violent.Robbery float64
     Data.Totals.Property.All   int64
Data.Totals.Property.Burglary   int64
 Data.Totals.Property.Larceny   int64
   Data.Totals.Property.Motor   int64
      Data.Totals.Violent.All   int64
  Data.Totals.Violent.Assault   int64
   Data.Totals.Violent.Murder   int64
     Data.Totals.Violent.Rape   int64
  Data.Totals.Violent.Robbery   int64


Check Means

In [15]:
# Ensure Year is numeric (optional)
df["Year"] = pd.to_numeric(df["Year"], errors="coerce")

# Option A: use all numeric columns except Year
numeric_cols = df.select_dtypes(include="number").columns.tolist()
numeric_cols = [c for c in numeric_cols if c != "Year"]

state_means_all = df.groupby("State")[numeric_cols].mean().reset_index()
print(state_means_all.head())

        State  Data.Population  Data.Rates.Property.All  \
0     Alabama     4.111466e+06              3252.395000   
1      Alaska     5.152013e+05              3922.298333   
2     Arizona     3.948484e+06              5296.573333   
3    Arkansas     2.422008e+06              3142.040000   
4  California     2.855579e+07              4475.756667   

   Data.Rates.Property.Burglary  Data.Rates.Property.Larceny  \
0                    921.441667                  2078.108333   
1                    774.365000                  2687.711667   
2                   1294.408333                  3445.930000   
3                    882.946667                  2058.623333   
4                   1228.288333                  2620.975000   

   Data.Rates.Property.Motor  Data.Rates.Violent.All  \
0                 252.871667              440.486667   
1                 460.228333              525.123333   
2                 556.223333              482.733333   
3                 200.463333        