# Web Scraping for Apartment Details in Jersey City, NJ

## List of Apartments: 
1. [70 Greene Apartments](https://www.equityapartments.com/new-york-city/jersey-city/70-greene-apartments)
2. [Hudson Point](https://www.equityapartments.com/new-york-city/jersey-city/hudson-point-apartments)
3. [The Pier](https://www.equityapartments.com/new-york-city/jersey-city/the-pier-apartments)
4. [Windsor at Liberty House](https://www.windsoratlibertyhouse.com/) 
5. [Vine](https://vinehoboken.com/)

### [70 Greene Apartments](https://www.equityapartments.com/new-york-city/jersey-city/70-greene-apartments)

In [1]:
# Webscraping the Apartment Price in Jersey City
# Congxin (David) Xu
# 12/14/2020

# %% Import Modules
import pandas
import requests
from bs4 import BeautifulSoup
# %% Define parameters
# Store the URL as a variable
url = 'https://www.equityapartments.com/new-york-city/jersey-city/70-greene-apartments'

# Create a headers with user agent string
headers = {'user-agent': 'David Xu Personal Use (cx2rx@virginia.edu)'}

# Get the URL page
r = requests.get(url, headers = headers)

# Check status
print(r)

<Response [200]>


In [2]:
# Parse the HTML with Beautiful Soup
apts = BeautifulSoup(r.text, 'html.parser')

# Create empty list to store results
model = list()
available = list()
price = list()
time_period = list()
area = list()
floor = list()
amenity = list()
image = list()

# Iterate over all available apartments
for i in range(0, len(apts.find_all("ea5-unit"))):
    model += [apts.find_all("ea5-unit")[i].find_all('p')[1].get_text().strip()]
    available += [apts.find_all('ea5-unit')[i].find_all('p')[3].get_text().strip().split(" ")[1]]
    price += [apts.find_all("ea5-unit")[i].find_all("span")[0].get_text().split('$')[1]]
    time_period += [apts.find_all("ea5-unit")[i].find_all("span")[1].get_text()]
    area += [apts.find_all("ea5-unit")[i].find_all("span")[2].get_text().split(" ")[0]]
    image += [apts.find_all("ea5-unit")[i].find('img')['src']]
    
    # Handling Missing floor...
    try:
        apts.find_all("ea5-unit")[i].find_all("span")[3].get_text().split(" ")[1]
    except:
        floor += ['NA']
    else: 
        floor += [apts.find_all("ea5-unit")[i].find_all("span")[3].get_text().split(" ")[1]]
    
    # Hanlding multiple amenities...
    tmp = list()
    for j in range(0, len(apts.find_all("ea5-unit")[i].find_all('p', class_= 'amenity'))):
        tmp += [apts.find_all("ea5-unit")[i].find_all('p', class_= 'amenity')[j].get_text().strip()]
    amenity += [tmp]
    
# Create a data frame for return
d = {'model': model, 'price': price, 'available': available, 'time_period': time_period, 
     'area': area, 'floor': floor, 'amenity': amenity, 'image': image}
df = pandas.DataFrame(data=d)
df

Unnamed: 0,model,price,available,time_period,area,floor,amenity,image
0,0 Bed / 1 Bath,1965,3/7/2021,12 mo,505,29.0,[Western Exposure],https://media.equityapartments.com/image/uploa...
1,0 Bed / 1 Bath,2083,3/6/2021,12 mo,505,,"[Hardwood Floors, Western Exposure]",https://media.equityapartments.com/image/uploa...
2,0 Bed / 1 Bath,2156,3/10/2021,12 mo,627,30.0,[Western Exposure],https://media.equityapartments.com/image/uploa...
3,0 Bed / 1 Bath,2259,3/6/2021,12 mo,627,40.0,"[Hardwood Floors, Western Exposure]",https://media.equityapartments.com/image/uploa...
4,0 Bed / 1 Bath,2279,3/4/2021,12 mo,627,39.0,"[Hardwood Floors, Kitchen Backsplash, Western ...",https://media.equityapartments.com/image/uploa...
5,1 Bed / 1 Bath,2373,3/19/2021,12 mo,592,17.0,"[Eastern Exposure, Northern Exposure]",https://media.equityapartments.com/image/uploa...
6,1 Bed / 1 Bath,2425,3/6/2021,12 mo,755,7.0,"[Exterior Brick Facade, Western Exposure]",https://media.equityapartments.com/image/uploa...
7,1 Bed / 1 Bath,2425,4/9/2021,12 mo,592,21.0,[View of Lower Manhattan],https://media.equityapartments.com/image/uploa...
8,1 Bed / 1 Bath,2430,3/6/2021,12 mo,592,26.0,"[Eastern Exposure, Northern Exposure]",https://media.equityapartments.com/image/uploa...
9,1 Bed / 1 Bath,2518,5/1/2021,12 mo,665,29.0,[Western Exposure],https://media.equityapartments.com/image/uploa...


In [3]:
df.to_csv('70_greene_apartments_availability.csv')

### [Hudson Point](https://www.equityapartments.com/new-york-city/jersey-city/hudson-point-apartments)

In [4]:
# Store the URL as a variable
url = 'https://www.equityapartments.com/new-york-city/jersey-city/hudson-point-apartments'

# Create a headers with user agent string
headers = {'user-agent': 'David Xu Personal Use (cx2rx@virginia.edu)'}

# Get the URL page
r = requests.get(url, headers = headers)

# Check status
print(r)

<Response [200]>


In [5]:
# Parse the HTML with Beautiful Soup
apts = BeautifulSoup(r.text, 'html.parser')

# Create empty list to store results
model = list()
available = list()
price = list()
time_period = list()
area = list()
floor = list()
amenity = list()
image = list()

# Iterate over all available apartments
for i in range(0, len(apts.find_all("ea5-unit"))):
    model += [apts.find_all("ea5-unit")[i].find_all('p')[1].get_text().strip()]
    available += [apts.find_all('ea5-unit')[i].find_all('p')[3].get_text().strip().split(" ")[1]]
    price += [apts.find_all("ea5-unit")[i].find_all("span")[0].get_text().split('$')[1]]
    time_period += [apts.find_all("ea5-unit")[i].find_all("span")[1].get_text()]
    area += [apts.find_all("ea5-unit")[i].find_all("span")[2].get_text().split(" ")[0]]
    image += [apts.find_all("ea5-unit")[i].find('img')['src']]
    
    # Handling Missing floor...
    try:
        apts.find_all("ea5-unit")[i].find_all("span")[3].get_text().split(" ")[1]
    except:
        floor += ['NA']
    else: 
        floor += [apts.find_all("ea5-unit")[i].find_all("span")[3].get_text().split(" ")[1]]
    
    # Hanlding multiple amenities...
    tmp = list()
    for j in range(0, len(apts.find_all("ea5-unit")[i].find_all('p', class_= 'amenity'))):
        tmp += [apts.find_all("ea5-unit")[i].find_all('p', class_= 'amenity')[j].get_text().strip()]
    amenity += [tmp]
    
# Create a data frame for return
d = {'model': model, 'price': price, 'available': available, 'time_period': time_period, 
     'area': area, 'floor': floor, 'amenity': amenity, 'image': image}
df = pandas.DataFrame(data=d)
df

Unnamed: 0,model,price,available,time_period,area,floor,amenity,image
0,1 Bed / 1 Bath,2064,3/4/2021,12 mo,726,3,"[Bay Window, Modern Kitchen Features]",https://media.equityapartments.com/image/uploa...
1,1 Bed / 1 Bath,2214,4/6/2021,12 mo,726,3,"[Bay Window, Modern Kitchen Features]",https://media.equityapartments.com/image/uploa...
2,1 Bed / 1 Bath,2308,3/4/2021,12 mo,726,5,"[Bay Window, Hardwood Floors, Modern Kitchen F...",https://media.equityapartments.com/image/uploa...
3,1 Bed / 1 Bath,2360,4/24/2021,12 mo,726,6,"[Bay Window, Hardwood Floors, Modern Kitchen F...",https://media.equityapartments.com/image/uploa...
4,1 Bed / 1 Bath,2463,3/4/2021,12 mo,726,6,"[Bay Window, Hardwood Floors, Modern Kitchen F...",https://media.equityapartments.com/image/uploa...
5,1 Bed / 1 Bath,2524,3/4/2021,12 mo,824,2,"[Kitchen Window, Marina and Statue of Liberty ...",https://media.equityapartments.com/image/uploa...
6,1 Bed / 1 Bath,2663,3/4/2021,12 mo,931,,"[Kitchen Window, Modern Kitchen Features]",https://media.equityapartments.com/image/uploa...
7,2 Bed / 2 Bath,3168,3/4/2021,12 mo,1213,,"[Hardwood Floors, Kitchen Window, Modern Kitch...",https://media.equityapartments.com/image/uploa...
8,2 Bed / 2 Bath,3222,4/15/2021,12 mo,1109,Floorplan,"[Large Private Terrace, Modern Kitchen Features]",https://media.equityapartments.com/image/uploa...
9,2 Bed / 2 Bath,3227,3/25/2021,12 mo,1109,2,"[Large Private Terrace, Modern Kitchen Features]",https://media.equityapartments.com/image/uploa...


In [6]:
df.to_csv('Hudson_Point_availability.csv')

### [The Pier](https://www.equityapartments.com/new-york-city/jersey-city/the-pier-apartments)

In [7]:
# Store the URL as a variable
url = 'https://www.equityapartments.com/new-york-city/jersey-city/the-pier-apartments'

# Create a headers with user agent string
headers = {'user-agent': 'David Xu Personal Use (cx2rx@virginia.edu)'}

# Get the URL page
r = requests.get(url, headers = headers)

# Check status
print(r)

<Response [200]>


In [8]:
# Parse the HTML with Beautiful Soup
apts = BeautifulSoup(r.text, 'html.parser')

# Create empty list to store results
model = list()
available = list()
price = list()
time_period = list()
area = list()
floor = list()
amenity = list()
image = list()

# Iterate over all available apartments
for i in range(0, len(apts.find_all("ea5-unit"))):
    model += [apts.find_all("ea5-unit")[i].find_all('p')[1].get_text().strip()]
    available += [apts.find_all('ea5-unit')[i].find_all('p')[3].get_text().strip().split(" ")[1]]
    price += [apts.find_all("ea5-unit")[i].find_all("span")[0].get_text().split('$')[1]]
    time_period += [apts.find_all("ea5-unit")[i].find_all("span")[1].get_text()]
    area += [apts.find_all("ea5-unit")[i].find_all("span")[2].get_text().split(" ")[0]]
    image += [apts.find_all("ea5-unit")[i].find('img')['src']]
    
    # Handling Missing floor...
    try:
        apts.find_all("ea5-unit")[i].find_all("span")[3].get_text().split(" ")[1]
    except:
        floor += ['NA']
    else: 
        floor += [apts.find_all("ea5-unit")[i].find_all("span")[3].get_text().split(" ")[1]]
    
    # Hanlding multiple amenities...
    tmp = list()
    for j in range(0, len(apts.find_all("ea5-unit")[i].find_all('p', class_= 'amenity'))):
        tmp += [apts.find_all("ea5-unit")[i].find_all('p', class_= 'amenity')[j].get_text().strip()]
    amenity += [tmp]
    
# Create a data frame for return
d = {'model': model, 'price': price, 'available': available, 'time_period': time_period, 
     'area': area, 'floor': floor, 'amenity': amenity, 'image': image}
df = pandas.DataFrame(data=d)
df

Unnamed: 0,model,price,available,time_period,area,floor,amenity,image
0,1 Bed / 1 Bath,2485,5/1/2021,12 mo,961,1.0,[Renovated],https://media.equityapartments.com/image/uploa...
1,1 Bed / 1 Bath,2513,3/4/2021,12 mo,712,6.0,"[Balcony, Partial View of Hudson River, Renova...",https://media.equityapartments.com/image/uploa...
2,1 Bed / 1 Bath,2552,3/4/2021,12 mo,741,7.0,"[Juliet Balcony, Renovated, View of Hudson River]",https://media.equityapartments.com/image/uploa...
3,1 Bed / 1 Bath,2557,3/4/2021,12 mo,703,5.0,"[Balcony, Renovated, View of Hudson River]",https://media.equityapartments.com/image/uploa...
4,1 Bed / 1 Bath,2573,3/24/2021,12 mo,703,4.0,"[Balcony, Renovated]",https://media.equityapartments.com/image/uploa...
5,1 Bed / 1 Bath,2583,5/19/2021,12 mo,741,7.0,"[Juliet Balcony, Renovated]",https://media.equityapartments.com/image/uploa...
6,1 Bed / 1 Bath,2625,3/4/2021,12 mo,741,4.0,"[Balcony, Renovated]",https://media.equityapartments.com/image/uploa...
7,1 Bed / 1 Bath,2651,5/11/2021,12 mo,741,6.0,"[Balcony, Renovated]",https://media.equityapartments.com/image/uploa...
8,1 Bed / 1 Bath,2651,5/13/2021,12 mo,741,6.0,"[Balcony, Renovated]",https://media.equityapartments.com/image/uploa...
9,1 Bed / 1 Bath,2763,3/30/2021,12 mo,741,,"[Patio, Renovated]",https://media.equityapartments.com/image/uploa...


In [9]:
df.to_csv('The_Pier_availability.csv')

### [Windsor at Liberty House](https://www.windsoratlibertyhouse.com/) 

In [10]:
# Store the URL as a variable
url = 'https://www.windsoratlibertyhouse.com/floorplans'

# Create a headers with user agent string
headers = {'user-agent': 'David Xu Personal Use (cx2rx@virginia.edu)'}

# Get the URL page
r = requests.get(url, headers = headers)

# Check status
print(r)

<Response [200]>


In [11]:
# Parse the HTML with Beautiful Soup
apts = BeautifulSoup(r.text, 'html.parser')

# Create empty list to store results
name = list()
available = list()
n_bed = list()
n_bath = list()
area = list()
min_rent = list()
max_rent = list()
deposit = list()
description = list()
image = list()

# Iterate over all available apartments
for i in range(0, len(apts.find_all("div", class_="row border-bottom pb-3 mb-3"))):
    
    # If there is a video, add 1 to the index
    if apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
        find_all('span')[0].get_text().strip() == 'Video':
        index = 1
    else:
        index = 0
    
    # If there is no apartment available, there is no rent
    if apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
                  find_all('span')[index + 1].get_text().split(" ")[0] == '0':
        
        min_rent += ['NA']
        max_rent += ['NA']
    
    else:
        # Minimum Rent
        min_rent += [apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
                 find_all('span')[-3].get_text().split(" ")[0][:-2]]
        # Maximum Rent
        max_rent += [apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
                 find_all('span')[-3].get_text().split(" ")[2]]
        
    # Apartment Name
    name += [apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
             find_all('span')[index + 0].get_text().strip()]
    # Number of Available Apartments
    available += [apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
                  find_all('span')[index + 1].get_text().split(" ")[0]]
    # Number of Bedroom 
    n_bed += [apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
              find_all('span')[index + 2].get_text().split(" ")[0]]
    # Number of Bathroom 
    n_bath += [apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
               find_all('span')[index + 4].get_text().split(" ")[0]]
    # Area
    area += [apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
             find_all('span')[index + 6].get_text().split(" ")[0]]
    
    # Deposit
    deposit += [apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
                find_all('span')[-1].get_text()]
    # Description
    description += [apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].\
                    find_all('p')[0].get_text().strip()]
    # image
    image += [apts.find_all("div", class_="row border-bottom pb-3 mb-3")[i].find_all('img')[0]['data-images']]
    
    
# Create a data frame for return
d = {'Apartment Name': name, 'Number of Available Apartments': available, 'Number of Bedroom': n_bed, 
     'Number of Bathroom': n_bath, 'Area': area, 'Minimum Rent': min_rent, 
     'Maximum Rent': max_rent, 'Deposit': deposit, 'Description': description, 'Image': image}

df = pandas.DataFrame(data=d).sort_values("Number of Available Apartments", ascending = False)
df

Unnamed: 0,Apartment Name,Number of Available Apartments,Number of Bedroom,Number of Bathroom,Area,Minimum Rent,Maximum Rent,Deposit,Description,Image
0,Asbury,9,1,1.0,719,"$2,615","$4,305","$1,000","If you are looking for great natural light, th...",https://res.cloudinary.com/yardi/image/upload/...
1,Alexander,3,1,1.0,712,"$2,565","$3,790","$1,000",The Alexander is perfect if you're looking for...,https://res.cloudinary.com/yardi/image/upload/...
9,Bishop,3,2,2.0,1071,"$3,800","$5,730","$1,000",Our Bishop apartment homes features 2-bedrooms...,https://res.cloudinary.com/yardi/image/upload/...
10,Ashton Penthouse,2,2,2.0,1057,"$3,825","$5,730","$1,000",The Ashton Penthouse duplex-styled apartment f...,https://res.cloudinary.com/yardi/image/upload/...
11,Beacon Loft,2,2,2.0,1506,"$4,775","$7,090","$1,000","The Beacon Loft is one of our largest, most sp...",https://res.cloudinary.com/yardi/image/upload/...
3,Ashton,1,1,1.0,774,"$2,535","$3,760","$1,000",The Ashton is perfect if you're looking for a ...,https://res.cloudinary.com/yardi/image/upload/...
4,Baldwin,1,1,1.0,923,"$3,225","$4,600","$1,000","Our Baldwin apartment features 1-bedroom, 1-ba...",https://res.cloudinary.com/yardi/image/upload/...
2,Astor,1,1,1.0,716,"$2,820","$4,195","$1,000",Perfect one-bedroom for the resident looking f...,https://res.cloudinary.com/yardi/image/upload/...
14,Bainbridge Penthouse,1,3,2.0,1215,"$4,910","$6,715","$1,000",Beautiful three bedroom penthouse!,https://res.cloudinary.com/yardi/image/upload/...
15,Baldwin Penthouse,1,3,2.0,1228,"$5,085","$8,310","$1,000",Our Baldwin Penthouse is a luxurious duplex-st...,https://res.cloudinary.com/yardi/image/upload/...


In [12]:
df.to_csv('Windsor_at_Liberty_House_availability.csv')

### [Vine](https://vinehoboken.com/)

In [13]:
# Store the URL as a variable
url = 'https://vinehoboken.com/floorplans/'

# Create a headers with user agent string
headers = {'user-agent': 'David Xu Personal Use (cx2rx@virginia.edu)'}

# Get the URL page
r = requests.get(url, headers = headers)

# Check status
print(r)

<Response [200]>


In [14]:
# Parse the HTML with Beautiful Soup
apts = BeautifulSoup(r.text, 'html.parser')

In [15]:
# Create empty list to store results
name = list()
amenity = list()
n_bed = list()
n_bath = list()
area = list()
term = list()
price = list()
image = list()

for i in range(0, len(apts.find_all('div', class_='fpm fpm--enhanced')[0].find_all("li"))):

    # New page:
    new_url = apts.find_all('div', class_='fpm fpm--enhanced')[0].find_all("li")[i].find_all('a')[0]['href']

    # Get the URL page
    new_r = requests.get(new_url, headers = headers)
    new_apts = BeautifulSoup(new_r.text, 'html.parser')

    # Name
    name += [new_apts.find_all('div', class_='fpm-unit-details__container')[0].\
        find_all('p')[0].get_text().strip()]
    # Number of Bedroom
    n_bed += [new_apts.find_all('div', class_='fpm-unit-details__container')[0].\
        find_all('p', class_= "fpm-unit-details__info-content")[0].\
        get_text().strip().split(' ')[0]]
    # Number of Bathroom
    n_bath += [new_apts.find_all('div', class_='fpm-unit-details__container')[0].\
        find_all('p', class_= "fpm-unit-details__info-content")[1].\
        get_text().strip().split(' ')[0]]
    # Amentiy
    amenity += [new_apts.find_all('div', class_='fpm-unit-details__container')[0].\
        find_all('p')[3].get_text().strip()]
    # Area
    area += [new_apts.find_all('div', class_='fpm-unit-details__container')[0].\
        find_all('p', class_= "fpm-unit-details__info-content")[-1].\
        get_text().strip().split(' ')[0]]
    # Price
    price += [new_apts.find_all('p', class_='fpm-unit-details__details-info-content')[0].\
        get_text().strip().split(':')[1].strip()]
    # Lease Term
    term += [new_apts.find_all('div', class_='fpm-unit-details__container')[0].
        find_all('p', class_="fpm-unit-details__lease-term-no-terms")[0].get_text()]

    # Image
    image += [new_apts.find_all('div', class_='fpm-unit-details__container')[0].\
        find_all('img')[1]['src']]
    
    
# Create a data frame for return
d = {'Apartment Name': name, 'Number of Bedroom': n_bed, 
     'Number of Bathroom': n_bath, 'Area': area, 'Price': price, 
     'Amenity': amenity, 'Term': term, 'Image': image}

df = pandas.DataFrame(data=d)
df

Unnamed: 0,Apartment Name,Number of Bedroom,Number of Bathroom,Area,Price,Amenity,Term,Image
0,A-Line Floor4,3,2,1319,Contact Us,+Terrace,There are currently no lease terms set for thi...,https://vinehoboken.com/assets/images/floorpla...
1,B Line Floor 4,1,1,Sq.,Contact Us,+Terrace,There are currently no lease terms set for thi...,https://vinehoboken.com/assets/images/floorpla...
2,Residence 13,1,1,850,Contact Us,+Terrace,There are currently no lease terms set for thi...,https://vinehoboken.com/assets/images/floorpla...
3,Residence 14,1,1,1495,Contact Us,+Den & Terrace,There are currently no lease terms set for thi...,https://vinehoboken.com/assets/images/floorpla...
4,F Line Floor 3,2,2,Sq.,Contact Us,+Terrace,There are currently no lease terms set for thi...,https://vinehoboken.com/assets/images/floorpla...
...,...,...,...,...,...,...,...,...
59,Residence 12,1,1,917,Contact Us,+Den,There are currently no lease terms set for thi...,https://vinehoboken.com/assets/images/floorpla...
60,Residence 12,1,1,917,Contact Us,+ Den,There are currently no lease terms set for thi...,https://vinehoboken.com/assets/images/12-11.svg
61,Residence 13,1,1,850,"$2,525",850 Sq. Ft.,There are currently no lease terms set for thi...,https://vinehoboken.com/assets/images/floorpla...
62,Residence 14,1,1,798,Contact Us,798 Sq. Ft.,There are currently no lease terms set for thi...,https://vinehoboken.com/assets/images/floorpla...


In [16]:
df.to_csv('Vine_availability.csv')