In [1]:
import requests
import pandas as pd

# URL of the API
url = "https://opengis.detroitmi.gov/opengis/rest/services/PublicSafety/RMS_Crime_Incidents/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json"

# Make a GET request to the API
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Convert the JSON response to a DataFrame
    data = response.json()
    features = data.get("features", [])
    records = [feature["attributes"] for feature in features]
    df = pd.DataFrame.from_records(records)
    print(df.head())  # Display the first few rows of the DataFrame
else:
    print(f"Failed to retrieve data: {response.status_code}")


  crime_id report_number                                address  \
0  3009876    1701040208  W Outer Dr & Southfield Service Drive   
1  3019688    1701310002                 Forrer St & Tireman St   
2  3046707    1704050185               Lauder St & Margareta St   
3  3046854    1704050314           Saint Marys St & Majestic St   
4  3049535    1704110463                Celestine St & Young St   

    offense_description offense_category state_offense_code arrest_charge  \
0               ROBBERY          ROBBERY               1201         12000   
1                 ARSON            ARSON               2099         20000   
2       LARCENY - OTHER          LARCENY               2307         23007   
3  LARCENY FROM GROUNDS          LARCENY               2307         23009   
4                 ARSON            ARSON               2099         20000   

     charge_description  incident_timestamp incident_time  ...  \
0               ROBBERY       1483569000000         17:30  ...   
1 

In [4]:
# Set the max_columns option to None to display all columns
pd.set_option('display.max_columns', None)


In [5]:
df.head(5)

Unnamed: 0,crime_id,report_number,address,offense_description,offense_category,state_offense_code,arrest_charge,charge_description,incident_timestamp,incident_time,day_of_week,hour_of_day,year,scout_car_area,precinct,block_id,neighborhood,council_district,zip_code,longitude,latitude,ibr_date,oid
0,3009876,1701040208,W Outer Dr & Southfield Service Drive,ROBBERY,ROBBERY,1201,12000,ROBBERY,1483569000000,17:30,3,17,2017,808,8,261635404004019,College Park,2,48235,-83.218658,42.419085,1519923000000.0,1
1,3019688,1701310002,Forrer St & Tireman St,ARSON,ARSON,2099,20000,ARSON,1485001200000,07:20,6,7,2017,611,6,261635455001003,Warren Ave Community,7,48228,-83.200281,42.350847,1494852000000.0,2
2,3046707,1704050185,Lauder St & Margareta St,LARCENY - OTHER,LARCENY,2307,23007,LARCENY - OTHER,1491368400000,01:00,3,1,2017,1206,12,261635396003010,Winship,2,48235,-83.192437,42.427032,1494855000000.0,3
3,3046854,1704050314,Saint Marys St & Majestic St,LARCENY FROM GROUNDS,LARCENY,2307,23009,LARCENY FROM GROUNDS,1491439980000,20:53,3,20,2017,611,6,261635455002001,Warren Ave Community,7,48228,-83.203755,42.34596,1494855000000.0,4
4,3049535,1704110463,Celestine St & Young St,ARSON,ARSON,2099,20000,ARSON,1491979080000,02:38,3,2,2017,906,9,261635005004009,Mapleridge,4,48205,-82.972517,42.423891,1494855000000.0,5


In [6]:
df.shape

(2000, 23)

In [7]:
df.address

0       W Outer Dr & Southfield Service Drive
1                      Forrer St & Tireman St
2                    Lauder St & Margareta St
3                Saint Marys St & Majestic St
4                     Celestine St & Young St
                        ...                  
1995               Pinehurst St & Tireman Ave
1996              Barlow St & E State Fair St
1997                    Cruse St & Keeler Ave
1998    Martin Luther King Jr Blvd & Cass Ave
1999                  E Hildale St & Mound Rd
Name: address, Length: 2000, dtype: object

In [14]:
from homeharvest import scrape_property
from datetime import datetime

# Generate a filename based on the current timestamp
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"HomeHarvest_{current_timestamp}.csv"

# Scrape properties from Detroit, MI
properties = scrape_property(
    location="Los Angeles, CA",
    listing_type="sold",  # You can change this to 'for_sale', 'for_rent', or 'pending'
    past_days=600  # Modify this as needed
    # You can also use date_from and date_to instead of past_days
    # mls_only=True can be used to fetch only MLS listings
    # proxy="http://user:pass@host:port" to use a proxy
)

print(f"Number of properties: {len(properties)}")

# Export to CSV
#properties.to_csv(filename, index=False)


Number of properties: 10000


In [10]:
properties.head(5)

Unnamed: 0,property_url,mls,mls_id,status,style,street,unit,city,state,zip_code,beds,full_baths,half_baths,sqft,year_built,days_on_mls,list_price,list_date,sold_price,last_sold_date,lot_sqft,price_per_sqft,latitude,longitude,stories,hoa_fee,parking_garage,primary_photo,alt_photos
0,https://www.realtor.com/realestateandhomes-det...,DEMI,20230086929,SOLD,SINGLE_FAMILY,18285 Fenton St,,Detroit,MI,48219,3,1,1.0,1062,1966,85,119900,2023-10-12,117000,2024-01-05,7405,110,42.422809,-83.283036,1,0,2.0,http://ap.rdcpix.com/8c036dac2183996c598eecb08...,http://ap.rdcpix.com/8c036dac2183996c598eecb08...
1,https://www.realtor.com/realestateandhomes-det...,DEMI,20230102573,SOLD,SINGLE_FAMILY,3244 W Buena Vista St,,Detroit,MI,48238,4,1,1.0,1482,1924,28,54900,2023-12-08,45955,2024-01-05,3920,31,42.388846,-83.127056,2,0,2.0,http://ap.rdcpix.com/70d36dc9448e47841bd88546a...,http://ap.rdcpix.com/70d36dc9448e47841bd88546a...
2,https://www.realtor.com/realestateandhomes-det...,DEMI,20230026351,SOLD,SINGLE_FAMILY,15827 Faircrest St,,Detroit,MI,48205,4,1,1.0,1078,1940,189,65000,2023-06-30,57000,2024-01-05,4792,53,42.432573,-82.957487,2,0,2.0,http://ap.rdcpix.com/7a1382de7150808377df892df...,http://ap.rdcpix.com/7a1382de7150808377df892df...
3,https://www.realtor.com/realestateandhomes-det...,DEMI,20230073000,SOLD,SINGLE_FAMILY,8272 Freda St,,Detroit,MI,48204,3,2,,1680,1926,127,189000,2023-08-31,172500,2024-01-05,4792,103,42.355176,-83.159627,2,0,2.0,http://ap.rdcpix.com/794a2012a90d3c6a7e442be00...,http://ap.rdcpix.com/794a2012a90d3c6a7e442be00...
4,https://www.realtor.com/realestateandhomes-det...,DEMI,20230073671,SOLD,SINGLE_FAMILY,6267 Grandville Ave,,Detroit,MI,48228,3,1,,868,1943,127,99900,2023-08-31,85000,2024-01-05,6098,98,42.335798,-83.229379,2,0,,http://ap.rdcpix.com/a699fea729de953083ea50bcb...,http://ap.rdcpix.com/a699fea729de953083ea50bcb...


In [15]:
from homeharvest import scrape_property
from datetime import datetime, timedelta

properties_list = []
start_date = datetime.now() - timedelta(days=400)  # Starting 600 days ago
end_date = datetime.now()
date_range = (end_date - start_date).days
days_per_request = date_range // 6  # Split into 6 requests

for i in range(6):
    date_from = start_date + timedelta(days=i*days_per_request)
    date_to = start_date + timedelta(days=(i+1)*days_per_request)

    partial_properties = scrape_property(
        location="Chicago, IL",
        listing_type="sold",
        date_from=date_from.strftime("%Y-%m-%d"),
        date_to=date_to.strftime("%Y-%m-%d")
    )

    print(f"Segment {i+1}, Number of properties: {len(partial_properties)}")
    properties_list.append(partial_properties)

# Combine all segments into one DataFrame
all_properties = pd.concat(properties_list, ignore_index=True)
print(f"Total Number of properties: {len(all_properties)}")

# Save to CSV
filename = "HomeHarvest_LosAngeles.csv"
all_properties.to_csv(filename, index=False)


Segment 1, Number of properties: 4590
Segment 2, Number of properties: 5882
Segment 3, Number of properties: 6687
Segment 4, Number of properties: 5817
Segment 5, Number of properties: 4807
Segment 6, Number of properties: 3808
Total Number of properties: 31591


In [17]:
from homeharvest import scrape_property
from datetime import datetime, timedelta

properties_list = []
start_date = datetime.now() - timedelta(days=500)
end_date = datetime.now()
date_range = (end_date - start_date).days
days_per_request = date_range // 12  # Now making 12 requests

for i in range(12):
    date_from = start_date + timedelta(days=i*days_per_request)
    date_to = start_date + timedelta(days=(i+1)*days_per_request)

    partial_properties = scrape_property(
        location="Chicago, IL",
        listing_type="sold",
        date_from=date_from.strftime("%Y-%m-%d"),
        date_to=date_to.strftime("%Y-%m-%d")
    )

    print(f"Segment {i+1}, Number of properties: {len(partial_properties)}")
    properties_list.append(partial_properties)

# Combine all segments into one DataFrame
all_properties = pd.concat(properties_list, ignore_index=True)
print(f"Total Number of properties: {len(all_properties)}")

# Save to CSV
filename = "HomeHarvest_Chicago.csv"
all_properties.to_csv(filename, index=False)


Segment 1, Number of properties: 4487
Segment 2, Number of properties: 3784
Segment 3, Number of properties: 3374
Segment 4, Number of properties: 2676


KeyboardInterrupt: 

In [22]:
from homeharvest import scrape_property
from datetime import datetime, timedelta

properties_list = []
start_date = datetime.now() - timedelta(days=500)
end_date = datetime.now()
date_range = (end_date - start_date).days
segments = 73  # Adjust the number of segments as necessary
days_per_request = date_range // segments

for i in range(segments):
    date_from = start_date + timedelta(days=i * days_per_request)
    date_to = date_from + timedelta(days=days_per_request)

    partial_properties = scrape_property(
        location="Chicago, IL",
        listing_type="sold",
        date_from=date_from.strftime("%Y-%m-%d"),
        date_to=date_to.strftime("%Y-%m-%d")
    )

    print(f"Segment {i+1}, Number of properties: {len(partial_properties)}")
    properties_list.append(partial_properties)

# Combine all segments into one DataFrame
all_properties = pd.concat(properties_list, ignore_index=True)
print(f"Total Number of properties: {len(all_properties)}")

# Save to CSV
filename = "HomeHarvest_Chicago.csv"
all_properties.to_csv(filename, index=False)


Segment 1, Number of properties: 863
Segment 2, Number of properties: 995
Segment 3, Number of properties: 676
Segment 4, Number of properties: 813
Segment 5, Number of properties: 555
Segment 6, Number of properties: 488
Segment 7, Number of properties: 772
Segment 8, Number of properties: 533
Segment 9, Number of properties: 625
Segment 10, Number of properties: 668
Segment 11, Number of properties: 637
Segment 12, Number of properties: 801
Segment 13, Number of properties: 542
Segment 14, Number of properties: 549
Segment 15, Number of properties: 670
Segment 16, Number of properties: 366
Segment 17, Number of properties: 687
Segment 18, Number of properties: 473
Segment 19, Number of properties: 564
Segment 20, Number of properties: 631
Segment 21, Number of properties: 495
Segment 22, Number of properties: 451
Segment 23, Number of properties: 433
Segment 24, Number of properties: 472
Segment 25, Number of properties: 414
Segment 26, Number of properties: 444
Segment 27, Number of