# Project Section Outline
A bit cleaner than just using comments

### Import dependencies
- from census import Census
- pandas as pd
- matplotlib.pyplot as plt
- from us import states
- os
- config file keys

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from census import Census
from us import states
from jmp_config import g_key
from jmp_config import census_key

### Import and clean CA crime data
- Get data from CSV file
- Clean up the county names
- Find only data for crimes with firearms
- Save a separate dataframe

In [11]:
#Set path for crime data from California
filepath = os.path.join('Data','Crimes_and_Clearances_with_Arson-1985-2019.csv')
df = pd.read_csv(filepath, low_memory=False)
#Remove 'county' from the County name field
df['County']=df['County'].str.replace(' County','')
#Remove unneeded fields.  
#'FROBact_sum' is total robberies with a firearm, actual offenses
#'FASSact_sum' is total assault with a firearm, actual offenses
df_clean=df[['Year','County','NCICCode','Violent_sum','FROBact_sum','FASSact_sum']]
#clean up column names
df_clean=df_clean.rename(columns={'FROBact_sum':'firearmRobberySum',
                                 'FASSact_sum':'firearmAssaultSum',
                                 'Violent_sum':'violentSum'})
df_clean.head()

Unnamed: 0,Year,County,NCICCode,violentSum,firearmRobberySum,firearmAssaultSum
0,1985,Alameda,Alameda Co. Sheriff's Department,427,77,25
1,1985,Alameda,Alameda,405,56,16
2,1985,Alameda,Albany,101,23,3
3,1985,Alameda,Berkeley,1164,242,47
4,1985,Alameda,Emeryville,146,35,6


### Import and clean Census data
- Get data from census API (2009 through 2019 is what is available)
- Get population data for CA
- Bonus: get population data for each county (may not be available)
- Save data as a simple dataframe (year, population)

In [59]:
#Set up a dataframe with a call to the census API for 2009 county population data
year=2009
c=Census(census_key, year=year)
census_data = c.acs5.get(('NAME','B01003_001E'), geo={'for': 'county:*',
                       'in': 'state:{}'.format(states.CA.fips)})
# Save as dataframe
CAPop_df = pd.DataFrame(census_data)

# Rename columns
CAPop_df = census_pd.rename(columns={"B01003_001E": "Population",
                                    "county":'countyNo',
                                    "state":'stateNo'})
#Add a column that only has the county name
CAPop_df['County']=CAPop_df['NAME'].str.replace(' County, California','')

#Add a year column
CAPop_df['Year']= year



#Set up a loop to get the rest of the data for years 2010-2019
for year in range(2010,2020):
    
    c=Census(census_key, year=year)
    census_data = c.acs5.get(('NAME','B01003_001E'), geo={'for': 'county:*',
                       'in': 'state:{}'.format(states.CA.fips)})

    # Save as dataframe
    census_pd = pd.DataFrame(census_data)

    # Rename columns
    census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                    "county":'countyNo',
                                    "state":'stateNo'})
    #Add a column that only has the county name
    census_pd['County']=census_pd['NAME'].str.replace(' County, California','')
    
    #Add a year column
    census_pd['Year']=year
    
    # Append the existing CAPop_df dataframe with the results from the current iteration
    CAPop_df=CAPop_df.append(census_pd,ignore_index=True)

CAPop_df.head()
CAPop_df.to_csv('Data/CA_CountyPop_2009to2019.csv')

### Group crime data
- Group by year
- Get total gun crimes per year
- Get total gun crimes per 100,000 people
- Bonus: group by county (if county population data is available)
- Bonus: get gun crimes per county
- Bonus: get gun crimes per 100,000 people in a county

### Plot gun crime data
- Bar chart of gun crimes per year
- Bar chart of gun crimes per year per capita
- Bonus: bar chart of gun crimes per year in LA County
- Bonus: bar chart of gun crimes per year in LA County per capita
- Save your charts to files