### Crime in the time of Corona - Create DB from raw CSV tables

In [182]:
## Add dependencies: Pandas
import pandas as pd
import os # needed to use the os.path.join method to load the files
from sqlalchemy import create_engine # for integrating with PostgreSQL
from config import db_password

#### Incidents: Load raw csv, create dataframe and clean data.

In [183]:
# Load the CSV Files into a dataframes.
incidents_raw_df = pd.read_csv("../Resources - MPD Data/incidentTableClean.csv")

In [184]:
# Review the incidents table fields and counts of each field.
incidents_raw_df.info()
# 1788 records. Removed MP20009253 from CSV due to very incomplete record. 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1788 entries, 0 to 1787
Data columns (total 16 columns):
Number                1788 non-null object
dateReported          1788 non-null object
startDate             1788 non-null object
offenseCode           1788 non-null object
offenseDescription    1788 non-null object
streetAddress         1784 non-null object
cityDescription       1739 non-null object
stateDescription      1225 non-null object
zipCode               1517 non-null object
longitude             1764 non-null float64
latitude              1764 non-null float64
Booked                1788 non-null int64
DAComplaint           1788 non-null int64
Cited                 1788 non-null int64
burglaryFactor        350 non-null object
felonyMisdemeanor     1719 non-null object
dtypes: float64(2), int64(3), object(11)
memory usage: 223.6+ KB


In [185]:
# Review Number field for uniqueness. Its our primary field
incidents_raw_df.Number.value_counts(sort=True, dropna=True, ascending=False, bins=None)
# Results show 1660 unique values meaning 127 are possible duplicates.
# Looks like Case Number duplication is valid due to multiple offenses under same case.
# Need to create a primary unique field. 



MP20008752    5
MP20009588    4
MP20004466    4
MP20003634    3
MP20008931    3
             ..
MP20009183    1
MP20009068    1
MP20008224    1
MP20002160    1
MP20001729    1
Name: Number, Length: 1660, dtype: int64

In [186]:
x=incidents_raw_df[incidents_raw_df["Number"] =='MP2001024070']
x.head()
                 
                   
 #                  passing_math = school_data_complete_df[school_data_complete_df["math_score"] >=70]
#passing_math.head()
    

Unnamed: 0,Number,dateReported,startDate,offenseCode,offenseDescription,streetAddress,cityDescription,stateDescription,zipCode,longitude,latitude,Booked,DAComplaint,Cited,burglaryFactor,felonyMisdemeanor


In [187]:
# Create a primary unique field: Number plus offenseCode. 
# Call it CID for Case Id
incidents_raw_df["CID"] = incidents_raw_df["Number"] + ' - ' + incidents_raw_df["offenseCode"] 
incidents_raw_df.head(3)

Unnamed: 0,Number,dateReported,startDate,offenseCode,offenseDescription,streetAddress,cityDescription,stateDescription,zipCode,longitude,latitude,Booked,DAComplaint,Cited,burglaryFactor,felonyMisdemeanor,CID
0,MP19037614,"Dec 31, 2019, 7:10:57 PM","Dec 31, 2019, 7:00:00 PM",245 (A)(1) PC,245 (A)(1) PC FORCE OR ADW NOT FIREARM:GBI LIK...,OAKDALE RD & FLOYD AVE,MODESTO,CALIFORNIA,95355.0,-120.95777,37.67808,0,0,0,,FELONY,MP19037614 - 245 (A)(1) PC
1,MP19037565,"Dec 31, 2019, 9:01:31 AM","Dec 31, 2019, 12:30:00 AM",460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190,3120 PELANDALE AVE,MO,CALIFORNIA,,-121.052988,37.699369,0,0,0,,FELONY,MP19037565 - 460(B) PC
2,MP19037555,"Dec 31, 2019, 8:12:40 AM","Dec 31, 2019, 5:00:00 AM",460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190,3120 PELANDALE AVE,MODESTO,CALIFORNIA,95356.0,-121.053001,37.699374,0,0,0,,FELONY,MP19037555 - 460(B) PC


In [188]:
# Check uniqueness of new combined field
incidents_raw_df.CID.value_counts(sort=True, dropna=True, ascending=False, bins=None) 
# Visual review of remaining double entries shows they are in fact duplicates.

MP20008752 - 187 (A) PC       2
MP20008752 - 273 A(A) PC      2
MP20008677 - 460(B) PC        2
MP20007431 - 245 (A)(2) PC    2
MP20009588 - 273.5 (A) PC     2
                             ..
MP20006136 - 460(B) PC        1
MP20002700 - 460 (A) PC       1
MP20003119 - 273 A(A) PC      1
MP20009371 - 460 (B) PC       1
MP20007995 - 10851 VC         1
Name: CID, Length: 1783, dtype: int64

In [189]:
# Drop the Duplicate Case IDs. Then check counts again.
incidents_raw_df.drop_duplicates(subset ="CID", keep = False, inplace = True)
incidents_raw_df.CID.value_counts(sort=True, ascending=False) 

MP20002437 - 487 (D)(2) PC    1
MP20001772 - 488 PC           1
MP20000423 - 10851 VC         1
MP20008400 - 460 (B) PC       1
MP20005392 - 460(B) PC        1
                             ..
MP20001586 - 460 (A) PC       1
MP20006136 - 460(B) PC        1
MP20002700 - 460 (A) PC       1
MP20003119 - 273 A(A) PC      1
MP20007995 - 10851 VC         1
Name: CID, Length: 1778, dtype: int64

In [190]:
# Reorder colummns
neworder = ['CID','Number','dateReported','startDate','offenseCode','offenseDescription','streetAddress','cityDescription','stateDescription','zipCode','longitude','latitude','Booked','DAComplaint','Cited','burglaryFactor','felonyMisdemeanor','dateIncident','weekNumber']
incidents_raw_df=incidents_raw_df.reindex(columns=neworder)
incidents_raw_df.head(3)

Unnamed: 0,CID,Number,dateReported,startDate,offenseCode,offenseDescription,streetAddress,cityDescription,stateDescription,zipCode,longitude,latitude,Booked,DAComplaint,Cited,burglaryFactor,felonyMisdemeanor,dateIncident,weekNumber
0,MP19037614 - 245 (A)(1) PC,MP19037614,"Dec 31, 2019, 7:10:57 PM","Dec 31, 2019, 7:00:00 PM",245 (A)(1) PC,245 (A)(1) PC FORCE OR ADW NOT FIREARM:GBI LIK...,OAKDALE RD & FLOYD AVE,MODESTO,CALIFORNIA,95355.0,-120.95777,37.67808,0,0,0,,FELONY,,
1,MP19037565 - 460(B) PC,MP19037565,"Dec 31, 2019, 9:01:31 AM","Dec 31, 2019, 12:30:00 AM",460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190,3120 PELANDALE AVE,MO,CALIFORNIA,,-121.052988,37.699369,0,0,0,,FELONY,,
2,MP19037555 - 460(B) PC,MP19037555,"Dec 31, 2019, 8:12:40 AM","Dec 31, 2019, 5:00:00 AM",460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190,3120 PELANDALE AVE,MODESTO,CALIFORNIA,95356.0,-121.053001,37.699374,0,0,0,,FELONY,,


In [191]:
# How much data is left in our dataframe?
incidents_raw_df.info()
# Looks like 1778 rows, so we eliminated 10 records.  Lots of work for a unique primary key, but necessary.

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1778 entries, 0 to 1787
Data columns (total 19 columns):
CID                   1778 non-null object
Number                1778 non-null object
dateReported          1778 non-null object
startDate             1778 non-null object
offenseCode           1778 non-null object
offenseDescription    1778 non-null object
streetAddress         1774 non-null object
cityDescription       1729 non-null object
stateDescription      1221 non-null object
zipCode               1509 non-null object
longitude             1754 non-null float64
latitude              1754 non-null float64
Booked                1778 non-null int64
DAComplaint           1778 non-null int64
Cited                 1778 non-null int64
burglaryFactor        350 non-null object
felonyMisdemeanor     1712 non-null object
dateIncident          0 non-null float64
weekNumber            0 non-null float64
dtypes: float64(4), int64(3), object(12)
memory usage: 277.8+ KB


In [192]:
## Ok. now we review and clean remaining fields in Incidents table. 
#  Start with the date field. Create a new date field stripped to date without time.
incidents_raw_df["dateIncident"] = pd.to_datetime(incidents_raw_df["dateReported"]).apply(lambda x: x.date())
incidents_raw_df.head(3)

Unnamed: 0,CID,Number,dateReported,startDate,offenseCode,offenseDescription,streetAddress,cityDescription,stateDescription,zipCode,longitude,latitude,Booked,DAComplaint,Cited,burglaryFactor,felonyMisdemeanor,dateIncident,weekNumber
0,MP19037614 - 245 (A)(1) PC,MP19037614,"Dec 31, 2019, 7:10:57 PM","Dec 31, 2019, 7:00:00 PM",245 (A)(1) PC,245 (A)(1) PC FORCE OR ADW NOT FIREARM:GBI LIK...,OAKDALE RD & FLOYD AVE,MODESTO,CALIFORNIA,95355.0,-120.95777,37.67808,0,0,0,,FELONY,2019-12-31,
1,MP19037565 - 460(B) PC,MP19037565,"Dec 31, 2019, 9:01:31 AM","Dec 31, 2019, 12:30:00 AM",460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190,3120 PELANDALE AVE,MO,CALIFORNIA,,-121.052988,37.699369,0,0,0,,FELONY,2019-12-31,
2,MP19037555 - 460(B) PC,MP19037555,"Dec 31, 2019, 8:12:40 AM","Dec 31, 2019, 5:00:00 AM",460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190,3120 PELANDALE AVE,MODESTO,CALIFORNIA,95356.0,-121.053001,37.699374,0,0,0,,FELONY,2019-12-31,


In [193]:
## Create a new week field so we can do stats of crime types over time by week.
incidents_raw_df['weekNumber'] = pd.to_datetime(incidents_raw_df['dateReported']).dt.week

# This works, but since our data starts in 2019, we have five weeks with numbers over 40. Starting with Week 47 with blank week.
incidents_raw_df.weekNumber.value_counts(sort=True, dropna=True, ascending=False, bins=None)

13    140
4     135
12    134
10    127
8     125
11    122
3     122
9     119
1     111
5     110
15    106
7     104
6     100
14     94
2      93
16     19
52     10
51      3
50      2
47      1
49      1
Name: weekNumber, dtype: int64

In [194]:
## How to fix to make weekNumber column useful for linear regression analysis?
#  Add six to each week number so that the first six slots in the week order can be allocated to 2019
incidents_raw_df['weekNumber']= incidents_raw_df['weekNumber'] + 6

In [195]:
# Now replace the high values for 2019 weeks with proper lower numbers.
incidents_raw_df["weekNumber"].replace({53:1,54:2,55:3,56:4,57:5,58:6}, inplace=True)

In [196]:
# How does the weekNumber field tally look now? Sweet!
incidents_raw_df.weekNumber.value_counts(sort=True, ascending=False, bins=None)

19    140
10    135
18    134
16    127
14    125
17    122
9     122
15    119
7     111
11    110
21    106
13    104
12    100
20     94
8      93
22     19
6      10
5       3
4       2
3       1
1       1
Name: weekNumber, dtype: int64

In [197]:
## final view before export of raw cleaned. 
#incidents_raw_df.reset_index(drop=True)
incidents_raw_df.head()
#df.reset_index(drop=True)

Unnamed: 0,CID,Number,dateReported,startDate,offenseCode,offenseDescription,streetAddress,cityDescription,stateDescription,zipCode,longitude,latitude,Booked,DAComplaint,Cited,burglaryFactor,felonyMisdemeanor,dateIncident,weekNumber
0,MP19037614 - 245 (A)(1) PC,MP19037614,"Dec 31, 2019, 7:10:57 PM","Dec 31, 2019, 7:00:00 PM",245 (A)(1) PC,245 (A)(1) PC FORCE OR ADW NOT FIREARM:GBI LIK...,OAKDALE RD & FLOYD AVE,MODESTO,CALIFORNIA,95355.0,-120.95777,37.67808,0,0,0,,FELONY,2019-12-31,7
1,MP19037565 - 460(B) PC,MP19037565,"Dec 31, 2019, 9:01:31 AM","Dec 31, 2019, 12:30:00 AM",460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190,3120 PELANDALE AVE,MO,CALIFORNIA,,-121.052988,37.699369,0,0,0,,FELONY,2019-12-31,7
2,MP19037555 - 460(B) PC,MP19037555,"Dec 31, 2019, 8:12:40 AM","Dec 31, 2019, 5:00:00 AM",460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190,3120 PELANDALE AVE,MODESTO,CALIFORNIA,95356.0,-121.053001,37.699374,0,0,0,,FELONY,2019-12-31,7
3,MP19037568 - 460 (A) PC,MP19037568,"Dec 31, 2019, 9:36:26 AM","Dec 31, 2019, 9:30:00 AM",460 (A) PC,460 (A) PC BURGLARY:FIRST DEGREE 05 Burglary -...,2129 CLAYMONT DR,MODESTO,CALIFORNIA,95350.0,-121.033225,37.687181,1,0,0,RESIDENCE - DAY (6AM - 6PM),FELONY,2019-12-31,7
4,MP19037568 - 10851 VC,MP19037568,"Dec 31, 2019, 9:36:26 AM","Dec 31, 2019, 9:30:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,2129 CLAYMONT DR,MODESTO,CALIFORNIA,95350.0,-121.033225,37.687181,1,0,0,,FELONY,2019-12-31,7


In [198]:
#Review
incidents_raw_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1778 entries, 0 to 1787
Data columns (total 19 columns):
CID                   1778 non-null object
Number                1778 non-null object
dateReported          1778 non-null object
startDate             1778 non-null object
offenseCode           1778 non-null object
offenseDescription    1778 non-null object
streetAddress         1774 non-null object
cityDescription       1729 non-null object
stateDescription      1221 non-null object
zipCode               1509 non-null object
longitude             1754 non-null float64
latitude              1754 non-null float64
Booked                1778 non-null int64
DAComplaint           1778 non-null int64
Cited                 1778 non-null int64
burglaryFactor        350 non-null object
felonyMisdemeanor     1712 non-null object
dateIncident          1778 non-null object
weekNumber            1778 non-null int64
dtypes: float64(2), int64(4), object(13)
memory usage: 277.8+ KB


In [199]:
## Create a list of unique cases for RDB
case_list_df= incidents_raw_df[["CID","Number"]].copy()
case_list_df.head()
case_list_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1778 entries, 0 to 1787
Data columns (total 2 columns):
CID       1778 non-null object
Number    1778 non-null object
dtypes: object(2)
memory usage: 41.7+ KB


#### Suspects: Load raw csv, create dataframes and clean data.

In [227]:
# Load the suspects csv into a dataframe.
suspects_raw_df = pd.read_csv("../Resources - MPD Data/suspectTableClean.csv")
suspects_raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1876 entries, 0 to 1875
Data columns (total 4 columns):
caseID      1876 non-null object
personID    1876 non-null int64
Age         1016 non-null object
Sex         1002 non-null object
dtypes: int64(1), object(3)
memory usage: 58.8+ KB


In [228]:
# Drop where both Age and Sex are NULL
suspects_raw_df.dropna(subset=['Age', 'Sex'], inplace=True)
suspects_raw_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 908 entries, 0 to 1875
Data columns (total 4 columns):
caseID      908 non-null object
personID    908 non-null int64
Age         908 non-null object
Sex         908 non-null object
dtypes: int64(1), object(3)
memory usage: 35.5+ KB


In [229]:
# Create a case and suspects dataframe. Needed for RDB. 
case_suspects_df= suspects_raw_df[["caseID","personID"]].copy()
case_suspects_df.head()
case_suspects_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 908 entries, 0 to 1875
Data columns (total 2 columns):
caseID      908 non-null object
personID    908 non-null int64
dtypes: int64(1), object(1)
memory usage: 21.3+ KB


In [230]:
# Drop Dupes
case_suspects_df=case_suspects_df.drop_duplicates()
case_suspects_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 906 entries, 0 to 1875
Data columns (total 2 columns):
caseID      906 non-null object
personID    906 non-null int64
dtypes: int64(1), object(1)
memory usage: 21.2+ KB


In [231]:
# Create a dataframe of unique Suspects
suspects_filtered_df= suspects_raw_df[["personID","Age","Sex"]].copy()
suspects_filtered_df.head()
suspects_filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 908 entries, 0 to 1875
Data columns (total 3 columns):
personID    908 non-null int64
Age         908 non-null object
Sex         908 non-null object
dtypes: int64(1), object(2)
memory usage: 28.4+ KB


In [232]:
# Drop the Duplicates. Then check counts again.
suspects_load_df=suspects_filtered_df.drop_duplicates(subset ="personID")
suspects_load_df.head()

Unnamed: 0,personID,Age,Sex
0,2309,54,FEMALE
1,3570,23,MALE
3,3986,23,MALE
4,2812,38,MALE
5,3663,21,FEMALE


In [233]:
# Review the suspects table structure
suspects_load_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 873 entries, 0 to 1875
Data columns (total 3 columns):
personID    873 non-null int64
Age         873 non-null object
Sex         873 non-null object
dtypes: int64(1), object(2)
memory usage: 27.3+ KB


In [250]:
# Create a case and suspects dataframe. Needed for RDB. 
case_suspects_df= suspects_raw_df[["caseID","personID"]].copy()
case_suspects_df.head()
case_suspects_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 908 entries, 0 to 1875
Data columns (total 2 columns):
caseID      908 non-null object
personID    908 non-null int64
dtypes: int64(1), object(1)
memory usage: 21.3+ KB


In [None]:
case_victims_df.info()

In [None]:
# Drop the Duplicates. Then check counts again.
case_victims=case_victims_df.drop_duplicates(subset ="personID")
case_victims_df.info()

#### Victims: Load raw csv, create dataframe and clean data.

In [236]:
# Load the victims csv into a dataframe.
victims_raw_df = pd.read_csv("../Resources - MPD Data/victimTable.csv")

In [237]:
# Review the suspects table structure
victims_raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1738 entries, 0 to 1737
Data columns (total 4 columns):
caseID      1738 non-null object
personID    1738 non-null int64
Age         1738 non-null object
Sex         1738 non-null object
dtypes: int64(1), object(3)
memory usage: 54.4+ KB


In [238]:
# Review the data
victims_raw_df.head()

Unnamed: 0,caseID,personID,Age,Sex
0,MP19035391,2938,35,FEMALE
1,MP19036742,2855,37,FEMALE
2,MP19037368,2399,50,MALE
3,MP19037420,3638,22,FEMALE
4,MP19037447,3885,15,FEMALE


In [239]:
# Drop where both Age and Sex are NULL
victims_raw_df.dropna(subset=['Age', 'Sex'], inplace=True)
victims_raw_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1738 entries, 0 to 1737
Data columns (total 4 columns):
caseID      1738 non-null object
personID    1738 non-null int64
Age         1738 non-null object
Sex         1738 non-null object
dtypes: int64(1), object(3)
memory usage: 67.9+ KB


In [245]:
# Drop Dupes
victims_raw_df=victims_raw_df.drop_duplicates()
victims_raw_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1715 entries, 0 to 1737
Data columns (total 4 columns):
caseID      1715 non-null object
personID    1715 non-null int64
Age         1715 non-null object
Sex         1715 non-null object
dtypes: int64(1), object(3)
memory usage: 67.0+ KB


In [246]:
# Create a case and victims dataframe. Needed for RDB. 
case_victims_df= victims_raw_df[["caseID","personID"]].copy()
case_victims_df.head()

Unnamed: 0,caseID,personID
0,MP19035391,2938
1,MP19036742,2855
2,MP19037368,2399
3,MP19037420,3638
4,MP19037447,3885


In [248]:
# Create a dataframe of unique Victims
victims_filtered_df= victims_raw_df[["personID","Age","Sex"]].copy()
victims_filtered_df.head()
victims_filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1715 entries, 0 to 1737
Data columns (total 3 columns):
personID    1715 non-null int64
Age         1715 non-null object
Sex         1715 non-null object
dtypes: int64(1), object(2)
memory usage: 53.6+ KB


In [251]:
# Drop the Duplicates. Then check counts again.
victims_load_df=victims_filtered_df.drop_duplicates(subset ="personID")
victims_load_df.head()

Unnamed: 0,personID,Age,Sex
0,2938,35,FEMALE
1,2855,37,FEMALE
2,2399,50,MALE
3,3638,22,FEMALE
4,3885,15,FEMALE


#### Penal Codes: Create dataframe and clean data.

In [None]:
## Now we look at our offense codes. 
incidents_raw_df.offenseCode.value_counts(sort=True, ascending=False, )
# Need a subject matter expert. This could be ok. Or not.

In [121]:
# Create the Penal Codes dataframe
#penal_codes_df = incidents_raw_df[["Penal_Code","Penal_Desc"]].copy()
penal_codes_df = incidents_raw_df[["offenseCode","offenseDescription"]].copy()
penal_codes_df.head(3)

Unnamed: 0_level_0,offenseCode,offenseDescription
CID,Unnamed: 1_level_1,Unnamed: 2_level_1
MP19037614 - 245 (A)(1) PC,245 (A)(1) PC,245 (A)(1) PC FORCE OR ADW NOT FIREARM:GBI LIK...
MP19037565 - 460(B) PC,460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190
MP19037555 - 460(B) PC,460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190


In [122]:
# Rename the columns
penal_codes_df.rename(columns={"offenseCode": "Penal_Code", "offenseDescription": "Penal_Desc"}, inplace=True)
penal_codes_df.head()

Unnamed: 0_level_0,Penal_Code,Penal_Desc
CID,Unnamed: 1_level_1,Unnamed: 2_level_1
MP19037614 - 245 (A)(1) PC,245 (A)(1) PC,245 (A)(1) PC FORCE OR ADW NOT FIREARM:GBI LIK...
MP19037565 - 460(B) PC,460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190
MP19037555 - 460(B) PC,460(B) PC,460(B) PC BURGLARY: VEHICLE/LARCENY 06 Theft 2190
MP19037568 - 460 (A) PC,460 (A) PC,460 (A) PC BURGLARY:FIRST DEGREE 05 Burglary -...
MP19037568 - 10851 VC,10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44


In [None]:
# quick view
penal_codes_df.Penal_Code.value_counts(sort=True, ascending=False)

In [123]:
# Drop Duplicates
penal_codes_df.drop_duplicates(subset ="Penal_Code", keep = False, inplace = True)
penal_codes_df.Penal_Code.value_counts()

487 (C) PC           1
244 PC               1
287 (C)(2)(A) PC     1
463 (B) PC           1
487 (B)(3) PC        1
664 /460 PC          1
487 (D) PC           1
463 (C) PC           1
286 (C)(2)(A) PC     1
289 (B) PC           1
273 D(A) PC          1
288 A(C)(1) PC       1
203 PC               1
664 /212.5 (A) PC    1
261 (A)(3) PC        1
273.5 (F)(1) PC      1
286 (B)(1) PC        1
664 /261 PC          1
243.4 (D) PC         1
215 (A) PC           1
261 PC               1
261 (A)(4)(A) PC     1
288.7 (B) PC         1
Name: Penal_Code, dtype: int64

In [124]:
##Quick View
penal_codes_df.head()

Unnamed: 0_level_0,Penal_Code,Penal_Desc
CID,Unnamed: 1_level_1,Unnamed: 2_level_1
MP20000408 - 273.5 (F)(1) PC,273.5 (F)(1) PC,273.5 (F)(1) PC INFLICT CORPORAL INJURY ON SPO...
MP20001529 - 664 /212.5 (A) PC,664 /212.5 (A) PC,664 /212.5 (A) PC ATTEMPTED ROBBERY:FIRST DEGR...
MP20001733 - 287 (C)(2)(A) PC,287 (C)(2)(A) PC,287 (C)(2)(A) PC ORAL COPULATION: BY USE OF F...
MP20001782 - 487 (B)(3) PC,487 (B)(3) PC,487 (B)(3) PC GRAND THEFT BY SERVANT/ETC ($400...
MP20002143 - 288 A(C)(1) PC,288 A(C)(1) PC,288 A(C)(1) PC ORAL COPULATION WITH PERSON UND...


### Export Files to CSV

In [207]:
# Crimes to CSV
cleaned_incidents = "../Resources/CleanedData/Cleaned_Incidents.csv"
incidents_raw_df.to_csv(cleaned_incidents, index=False)

In [208]:
# Unique cases to CSV
unique_cases = "../Resources/CleanedData/Unique_Cases.csv"
case_list_df.to_csv(unique_cases, index=False)

In [209]:
# Unique Suspects to CSV
output_suspects = "../Resources/CleanedData/Cleaned_Suspects.csv"
suspects_load_df.to_csv(output_suspects, index=False)

In [210]:
# Case-Suspects list to CSV
output_case_suspects = "../Resources/CleanedData/Case_Suspects.csv"
case_suspects_df.to_csv(output_case_suspects, index=False)

In [252]:
# Unique Victims to CSV
output_unique_victims = "../Resources/CleanedData/Unique_Victims.csv"
victims_load_df.to_csv(output_unique_victims, index=False)

In [253]:
# Case-Victims to CSV
output_victims = "../Resources/CleanedData/Case_Victims.csv"
case_victims_df.to_csv(output_victims, index=False)

In [None]:
-- all good up to here.  damn. lots of work

In [119]:
# Penal Codes to CSV
output_penal_codes = "../Resources/CleanedData/Penal_Codes.csv"
penal_codes_df.to_csv(output_penal_codes, index=False)

### Export dataframes to SQL tables in crime_corona db

In [17]:
# Create the database engine
db_string = f"postgres://postgres:{db_password}@127.0.0.1:5432/crime_corona"

# Create the engine
engine = create_engine(db_string)

In [223]:
# Unique cases to SQL
case_list_df.to_sql(name='unique_cases', con=engine, if_exists='append',index=False)

In [224]:
# Crimes list - cleaned original with all details to SQL
incidents_raw_df.to_sql(name='incidents', con=engine, if_exists='append',index=False)

In [225]:
# Suspects to SQL
suspects_load_df.to_sql(name='suspects', con=engine, if_exists='append', index=False)

In [234]:
# Case Suspects
case_suspects_df.to_sql(name='case_suspects', con=engine, if_exists='append',index=False)

In [None]:
 first create the SQL tables - confirm

In [None]:
# Victims to SQL
victims_raw_df.to_sql(name='victims', con=engine, if_exists='append', index=False)

In [None]:
# Case Victims
case_victimes_df.to_sql(name='case_victims', con=engine, if_exists='append',index=False)

In [125]:
# Penal Codes to SQL
penal_codes_df.to_sql(name='penal_codes', con=engine, if_exists='replace', index=False)