In [1]:
import numpy as np
import pandas as pd
import matplotlib.dates as mdates
import datetime as dt
import warnings # Turn off warnings
warnings.filterwarnings('ignore')

In [2]:
petition_df = pd.read_csv("/home/harrisonized/Data/SF Open Data/Petitions_to_the_Rent_Board.csv") # Import

In [3]:
# Rename some columns
petition_df = petition_df.rename(columns = {'Date Filed': 'Date',
                                             'Petition Source Zipcode': 'Zip Code',
                                             'Neighborhoods - Analysis Boundaries': 'Neighborhood Names',
                                             'Location': 'latlong',
                                             'Analysis Neighborhoods': 'Neighborhood Number',
                                             'Neighborhoods - Analysis Boundaries': 'Neighborhood Name'
                                            }
                                )

# Reorder columns, drop 'Priority', 'Appeal ID' and 'Prop I Petition'
petition_df = petition_df[['Date', 'Address', 'latlong', 'Zip Code',
    'Neighborhood Number', 'Neighborhood Name', 'SF Find Neighborhoods',
    'Current Police Districts', 'Current Supervisor Districts', 'Supervisor District',
                           
    'Petition ID',
       
    'Filing Party',
    'Ground (landlord): Capital Improvement',
    'Ground (landlord): Comparable Rents',
    'Ground (landlord): Extension of Time for Capital Improvements',
    'Ground (landlord): Intent to Withdraw (Ellis)',
    'Ground (landlord): Other',
    'Ground (landlord): Operating & Maintenance',
    'Ground (landlord): Substantial Rehabilitation',
    'Ground (landlord):  Costa-Hawkins',
    'Ground (landlord):  Exemption',
    'Ground (landlord): Rules and Regulations Section 1.21',
    'Ground (landlord): Rules and Regulations Section 6.14',
    'Ground (landlord): Rules and Regulations Section 6.15',
    'Ground (landlord): SRO Hotel Vistor Policy Petition',
    'Ground (landlord): Utility Passthrough Petition',
    'Ground (landlord): Owner Move-In Disability Determination',
    'Ground (landlord): Utility Passthrough Worksheet',
    'Ground (landlord): Non-Comparable Rents',

    'Ground (tenant): Decrease in Housing Services',
    'Ground (tenant): Failure to Repair and Maintain',
    'Ground (tenant): Passthrough Challenge',
    'Ground (tenant): Summary Petition',
    'Ground (tenant): Unlawful Rent Increase',
    'Ground (tenant): Wrongful Eviction Report',
    'Ground (tenant): Section 8 tenancy',
    'Ground (tenant): SRO Hotel Vistor Policy Petition',
    'Ground (tenant): Rules and Regulations Section 6.15',
    'Ground (tenant): Other',
    'Ground (tenant): Utility Passhtrough Hardship',
    'Ground (tenant): Water Revenue Bond Passthrough Hardship',
    'Ground (tenant): Capital Improvement Passthrough Hardship Petition',
    'Ground (tenant): Wrongful Severance of Housing Service',
    'Ground: Alternative Dispute Resolution', 
                           
    ':@computed_region_fyvs_ahh9', ':@computed_region_p5aj_wyqh',
    ':@computed_region_rxqg_mtj9', ':@computed_region_yftq_j783',
    ':@computed_region_bh8s_q3mv']]

# Clean up Zip Codes
petition_df['Zip Code'] = petition_df['Zip Code'].replace('[-][0-9]{4}', '', regex=True)
petition_df['Zip Code'] = petition_df['Zip Code'].str.extract('(\d+)', expand=False)
petition_df['Zip Code'] = petition_df['Zip Code'].apply(lambda x: np.int64(x) if not pd.isnull(x) else x)

# Standardizing some of the fields
petition_df['latlong'] = petition_df['latlong'].apply(lambda x: eval(x) if not pd.isnull(x) else x)
petition_df['Datetime'] = petition_df['Date'].apply(lambda x : dt.datetime.strptime(x, "%m/%d/%Y"))
petition_df['Timestamp'] = petition_df['Date'].apply(lambda x : mdates.datestr2num(x))

# Getting extra coordinates
petition_df['Latitude'] = petition_df['latlong'].apply(lambda x: x[0] if not pd.isnull(x) else x)
petition_df['Longitude'] = petition_df['latlong'].apply(lambda x: x[1] if not pd.isnull(x) else x)

# Add columns that include all the Petition IDs for mass petitions
petition_merge_df = petition_df.merge(
    petition_df.groupby(['Date', 'Address'])['Petition ID'] \
        .apply(list).to_frame('Petition ID List').reset_index(), on=['Date', 'Address'])

# Drop duplicates
petition_unique_df = petition_merge_df.drop(columns = ['Petition ID']) \
    .drop_duplicates(subset = ['Date', 'Address']).reset_index(drop = True)

# Add list length
petition_unique_df['Petition ID Len'] = petition_unique_df['Petition ID List'].map(len)

In [4]:
petition_unique_df.to_csv("/home/harrisonized/Data/SF Open Data/Petitions_to_the_Rent_Board_unique.csv", index=False) # Export