In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import os
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
viols = pd.read_csv("./data/viols.csv")
viols.head(n=2)
## We can improve by specifying dtype option for the read_csv to parse. Remember that 
## since read_csv automatically recognizes the dtypes after reading the whole dataset. So if 
## we provide upfront dtypes, it can start reading datasets right away, which saves a lot of 
## time consuming.

  data = self._reader.read(nrows)


Unnamed: 0.1,Unnamed: 0,TicketID,TicketNumber,AgencyName,ViolName,ViolationStreetNumber,ViolationStreetName,MailingStreetNumber,MailingStreetName,MailingCity,...,StateFee,CleanUpCost,JudgmentAmt,PaymentStatus,Void,ViolationCategory,ViolationAddress,MailingAddress,lat,lng
0,1,26288,05000001DAH,Department of Public Works,"Group, LLC, Grand Holding",2566,GRAND BLVD,743,"Beaubien, Ste. 201",Detroit,...,$10.00,$0.00,$1680.00,PAID IN FULL,0,0,"2566 GRAND BLVD\r\nDetroit, MI\r\n(42.36318237...","743 Beaubien\r\nDetroit, MI 48226\r\n(42.33373...",42.363182,-83.091677
1,2,19800,05000025DAH,Department of Public Works,"JACKSON, RAECHELLE",19014,ASHTON,20501,HEYDEN,DETROIT,...,$10.00,$0.00,$140.00,NO PAYMENT APPLIED,0,0,"19014 ASHTON\r\nDetroit, MI\r\n(42.42939076200...","20501 HEYDEN\r\nDETROIT, MI 48219\r\n(42.44217...",42.429391,-83.220394


More on the fields and the API of the dataset source: 
https://dev.socrata.com/foundry/data.detroitmi.gov/ujjt-auyi

We can see that pandas.read_csv() methods work very well. By comparision, we can print the first 5 lines of the file in the following program, which does not work that well since lines are broken by EOL character of "\n", even the character is inside "".

In order to parse correctly like the pandas.read_csv() method, we can improve the above code as follows

In [8]:
viols.TicketIssuedDT.head()

0    01/01/38440 12:00:00 AM
1    01/01/38383 12:00:00 AM
2    01/01/38383 12:00:00 AM
3    01/01/38385 12:00:00 AM
4    01/01/38385 12:00:00 AM
Name: TicketIssuedDT, dtype: object

In [6]:
## we create a modified next function of the file so that it skips the "\n" character enclosed between " "
def modinext(f): # f as a file
    line = ""
    while True:
        line += next(f)
        if line.count('\"') % 2 == 0:
            return line

# to see the first 5 lines
Nblines = 4
with open("./data/detroit-blight-violations.csv") as f:
    head = [modinext(f) for x in xrange(Nblines)] # use xrange instead of range when we dont use x. Just use for iteration
for i,line in enumerate(head):
    print "Line "+ str(i) + " : "+ line

Line 0 : TicketID,TicketNumber,AgencyName,ViolName,ViolationStreetNumber,ViolationStreetName,MailingStreetNumber,MailingStreetName,MailingCity,MailingState,MailingZipCode,NonUsAddressCode,Country,TicketIssuedDT,TicketIssuedTime,HearingDT,CourtTime,ViolationCode,ViolDescription,Disposition,FineAmt,AdminFee,LateFee,StateFee,CleanUpCost,JudgmentAmt,PaymentStatus,Void,ViolationCategory,ViolationAddress,MailingAddress

Line 1 : 26288,05000001DAH,Department of Public Works,"Group, LLC, Grand Holding",2566,GRAND BLVD      ,743,"Beaubien, Ste. 201",Detroit,MI,48226,N/A,,01/01/38440 12:00:00 AM,12:00:00,01/01/38474 12:00:00 AM, 9:00AM,22-2-20,Burning solid waste  in open fires,Responsible By Determination,$1500.00,$20.00,$150.00,$10.00,$0.00,$1680.00,PAID IN FULL,0,0,"2566 GRAND BLVD
Detroit, MI
(42.36318237000006, -83.09167672099994)","743 Beaubien
Detroit, MI 48226
(42.33373063000005, -83.04181755199994)"

Line 2 : 19800,05000025DAH,Department of Public Works,"JACKSON, RAECHELLE ",19014,ASHTO

In [9]:
dviolations.describe()

Unnamed: 0,TicketID,ViolationStreetNumber,Void,ViolationCategory
count,307804.0,307804.0,99133,307804.0
mean,182967.015269,12000.63,0,0.006553
std,94107.561832,412348.6,0,0.080684
min,18645.0,-11064.0,0,0.0
25%,101805.75,4936.0,0,0.0
50%,183824.5,10624.5,0,0.0
75%,265211.25,15895.0,0,0.0
max,339184.0,222222200.0,0,1.0


In [7]:
d311 = pd.read_csv("./data/detroit-311.csv")
d311.head(n=3)

Unnamed: 0,ticket_id,city,issue_type,ticket_status,issue_description,rating,ticket_closed_date_time,acknowledged_at,ticket_created_date_time,ticket_last_updated_date_time,address,lat,lng,location,image
0,1516722,City of Detroit,Clogged Drain,Acknowledged,"Two drains one on each side of street, street ...",3,,03/06/2015 10:03:38 PM,03/06/2015 09:57:52 PM,04/12/2015 01:01:10 AM,"13120-13130 Ilene Street Detroit, MI 48238, USA",42.383998,-83.161039,"(42.3839977668, -83.1610385642)",
1,1525361,City of Detroit,Clogged Drain,Acknowledged,standing water on lumplin,2,,03/11/2015 04:23:11 PM,03/11/2015 04:14:29 PM,04/07/2015 02:04:44 PM,"1485 E. Outer Drive Detroit, Michigan",42.440471,-83.080919,"(42.4404708, -83.080919)",
2,1525218,City of Detroit,Clogged Drain,Closed,CITZEN CALLED TO REPORT CLOGGED DRAINS,2,08/15/2015 12:03:43 AM,03/11/2015 03:39:05 PM,03/11/2015 03:26:20 PM,08/15/2015 12:03:44 AM,"15460 Eastburn Detroit, Michigan",42.445244,-82.962038,"(42.445244, -82.962038)",


In [20]:
d311.dtypes

ticket_id                          int64
city                              object
issue_type                        object
ticket_status                     object
issue_description                 object
rating                             int64
ticket_closed_date_time           object
acknowledged_at                   object
ticket_created_date_time          object
ticket_last_updated_date_time     object
address                           object
lat                              float64
lng                              float64
location                          object
image                             object
dtype: object

In [10]:
d311.describe()

Unnamed: 0,ticket_id,rating,lat,lng
count,19680.0,19680.0,19680.0,19680.0
mean,1699224.357571,2.69253,42.38631,-83.109313
std,172179.801056,0.801268,0.038308,0.100192
min,1184398.0,1.0,41.879952,-86.550059
25%,1591936.5,2.0,42.355576,-83.191768
50%,1705228.5,3.0,42.387895,-83.11241
75%,1838304.75,3.0,42.419723,-83.037698
max,1975499.0,19.0,42.449849,-82.911434


In [8]:
dcrime = pd.read_csv("./data/detroit-crime.csv")
dcrime.head(n=3)

  data = self._reader.read(nrows)


Unnamed: 0,ROWNUM,CASEID,INCINO,CATEGORY,OFFENSEDESCRIPTION,STATEOFFENSEFILECLASS,INCIDENTDATE,HOUR,SCA,PRECINCT,COUNCIL,NEIGHBORHOOD,CENSUSTRACT,ADDRESS,LON,LAT,LOCATION
0,53256,1953933,1506030028.1,ASSAULT,ASSAULT AND BATTERY/SIMPLE ASSAULT,13001,06/03/2015 12:00:00 AM,2,1007,10,City Council District 5,PETOSKEY-OTSEGO,5334,09100 PETOSKEY,-83.1221,42.3678,"PETOSKEY\n09100\n(42.3676, -83.1219)"
1,17631,1917717,1503010158.1,LARCENY,LARCENY - PARTS AND ACCESSORIES FROM VEHICLE,23006,03/01/2015 12:00:00 AM,9,608,6,City Council District 7,GRANDALE,5452,00 PLYMOUTH AND MANSFIELD,-83.2035,42.3724,"00 PLYMOUTH AND MANSFIELD\n(42.3725, -83.2033)"
2,11207,1910955,1502080223.1,STOLEN VEHICLE,VEHICLE THEFT,24001,02/08/2015 12:00:00 AM,18,1105,11,City Council District 3,OUTER DRIVE VAN DYKE,5051,00 E 7 MILE VAN DYKE,-83.0241,42.4338,"00 E 7 MILE VAN DYKE\n(42.4339, -83.0241)"


In [10]:
ddpermits = pd.read_csv("./data/detroit-demolition-permits.tsv", sep = "\t")
ddpermits.head(n=3)

Unnamed: 0,PERMIT_NO,PERMIT_APPLIED,PERMIT_ISSUED,PERMIT_EXPIRES,SITE_ADDRESS,BETWEEN1,PARCEL_NO,LOT_NUMBER,SUBDIVISION,CASE_TYPE,...,CONTRACTOR_ADDRESS1,CONTRACTOR_ADDRESS2,CONTRACTOR_CITY,CONTRACTOR_STATE,CONTRACTOR_ZIP,CONDITION_FOR_APPROVAL,site_location,owner_location,contractor_location,geom
0,BLD2015-03955,8/28/15,8/28/15,,4331 BARHAM,BETWEEN VOIGHT AND WAVENEY,21 65525-6,S15,,BLD,...,13500 FOLEY,,DETROIT,,48227,,"4331 BARHAM\nDetroit, MI\n(42.394106, -82.9474)","65 CADILLAC\nDETROIT, MI\n(42.331741326000042,...","13500 FOLEY\nDETROIT, MI\n(42.379332, -83.177501)",
1,BLD2015-04083,8/28/15,8/28/15,,9707 BESSEMORE,BETWEEN VINTON AND GRATIOT,19 2312.,40,,BLD,...,13500 FOLEY,,DETROIT,,48227,,"9707 BESSEMORE\nDetroit, MI\n(42.395122, -83.0...","65 CADILLAC\nDETROIT, MI\n(42.331741326000042,...","13500 FOLEY\nDETROIT, MI\n(42.379332, -83.177501)",
2,BLD2015-03976,8/28/15,8/28/15,,5315 BERKSHIRE,BETWEEN SOUTHAMPTON AND FRANKFORT,21 69321.,445,,BLD,...,13500 FOLEY,,DETROIT,,48227,,"5315 BERKSHIRE\nDetroit, MI\n(42.40322, -82.94...","65 CADILLAC\nDETROIT, MI\n(42.331741326000042,...","13500 FOLEY\nDETROIT, MI\n(42.379332, -83.177501)",
