---

_You are currently looking at **version 1.1** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-machine-learning/resources/bANLa) course resource._

---

## Predicting Property Maintenance Fines

This project is based on a data challenge from the Michigan Data Science Team ([MDST](http://midas.umich.edu/mdst/)). This project was created as part of the University of Michigan Applied Data Science program. 

The goal of this project is to assess the likelihood that a Detriot citizen will pay their blight ticket. Blight violations are issued when a person allows their property to remain in a deteriorated condition. There are millions of dollars in tickets issued each year, but many go unpaid. This project creates a probablistic estimate that any given ticket will be paid. The output is a 2 column matrix scoring each ticket. All data is provided by the [Detroit Open Data Portal](https://data.detroitmi.gov/). 


In [23]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

def blight_model():
    
    train_data = pd.read_csv('train.csv', encoding = 'ISO-8859-1')
    test_data = pd.read_csv('test.csv') 
    address_data =  pd.read_csv('addresses.csv')
    latlons = pd.read_csv('latlons.csv') 
    
    train_data = train_data[(train_data['compliance'] == 0) | (train_data['compliance'] == 1)]
    
    address = address_data.set_index('address').join(latlons.set_index('address'), how='left') 
    train_data = train_data.set_index('ticket_id').join(address.set_index('ticket_id'))
    test_data = test_data.set_index('ticket_id').join(address.set_index('ticket_id'))
    
    train_data = train_data[~train_data['hearing_date'].isnull()]
    
    train_remove_cols = ['balance_due','collection_status','compliance_detail','payment_amount',
                         'payment_date','payment_status']
    train_data.drop(train_remove_cols, axis=1, inplace=True) 
    
    remove_string_cols = ['violator_name', 'zip_code', 'country', 'city',
            'inspector_name', 'violation_street_number', 'violation_street_name',
            'violation_zip_code', 'violation_description',
            'mailing_address_str_number', 'mailing_address_str_name',
            'non_us_str_code', 'agency_name', 'state', 'disposition',
            'ticket_issued_date', 'hearing_date', 'grafitti_status', 'violation_code'] 
    
    train_data.drop(remove_string_cols, axis=1, inplace=True)
    test_data.drop(remove_string_cols, axis=1, inplace=True)    
    
    train_data.lat.fillna(method='pad', inplace=True)
    train_data.lon.fillna(method='pad', inplace=True)
    test_data.lat.fillna(method='pad', inplace=True)
    test_data.lon.fillna(method='pad', inplace=True)
    
    y_train = train_data.compliance
    X_train = train_data.drop('compliance', axis=1)
    X_test = test_data
    
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    clf = MLPClassifier(hidden_layer_sizes = [100, 10],
                        alpha=0.001,
                        random_state = 0, 
                        solver='lbfgs', 
                        verbose=0)
    clf.fit(X_train_scaled, y_train)
    
    y_prob = clf.predict_proba(X_test_scaled)[:,1]
    
    test_df = pd.read_csv('test.csv', encoding = "ISO-8859-1")
    test_df['compliance'] = y_prob
    test_df.set_index('ticket_id', inplace=True) 
    
    return test_df.compliance
    
 

In [24]:
blight_model()

  if self.run_code(code, result):


ticket_id
284932    0.044484
285362    0.028807
285361    0.056458
285338    0.051292
285346    0.069050
285345    0.051360
285347    0.091602
285342    0.319781
285530    0.029042
284989    0.028101
285344    0.089737
285343    0.027758
285340    0.028037
285341    0.091603
285349    0.069134
285348    0.051373
284991    0.028093
285532    0.028076
285406    0.028676
285001    0.026950
285006    0.027299
285405    0.028986
285337    0.029728
285496    0.067433
285497    0.046533
285378    0.028506
285589    0.030664
285585    0.043609
285501    0.060258
285581    0.028554
            ...   
376367    0.029129
376366    0.036594
376362    0.035352
376363    0.043205
376365    0.029129
376364    0.036594
376228    0.040240
376265    0.036316
376286    0.322978
376320    0.039149
376314    0.039278
376327    0.317067
376385    0.306234
376435    0.339537
376370    0.316198
376434    0.061442
376459    0.062023
376478    0.013753
376473    0.039170
376484    0.032526
376482    0.026864
37