In [75]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [76]:
df = pd.read_csv('../final_rental_merged.csv')

In [77]:
df.columns

Index(['Unnamed: 0', 'listedAt', 'daysOnMarket', 'availableFrom', 'street',
       'price', 'borough', 'neighborhood', 'zip', 'propertyType', 'beds',
       'baths', 'latitude', 'longitude', 'amenities', 'builtIn', 'description',
       'hasVideo', 'PhotosNum', 'state', 'isUndisclosedAddress',
       'isFeaturedListing', 'duplicate', 'Total Population', 'Median Age',
       'Male Population', 'Female Population', 'White Alone',
       'Black or African American Alone', 'Asian Alone', 'Hispanic or Latino',
       'Number of Households', 'Median Household Income', 'Per Capita Income',
       'Population Below Poverty Level', 'Employed', 'Unemployed',
       'Total Income Distribution', 'Median Gross Rent', 'Median Home Value',
       'Occupied Housing Units', 'Vacant Housing Units',
       'Owner-Occupied Units (value < $100,000)', 'Monthly Housing Costs',
       'High School Graduate (Age 25+)', 'Bachelor’s Degree (Age 25+)',
       'Graduate or Professional Degree (Age 25+)', 'English 

In [78]:
df.drop('Unnamed: 0',axis=1,inplace=True) # dropping the index column inferred as a normal column

In [79]:
print("Number of rows:",len(df))

Number of rows: 7842


## Analysing and cleaning columns

In [80]:
for column in df.columns:
    count = df[column].value_counts()
    print("-------------------------------------------")
    print(f"column : {column}")
    print(f"Number of Unique values: {len(count)}")
    if len(count) < 50:       
        print(f"values list: {list(df[column].unique())}")  # Convert to list and remove .index

-------------------------------------------
column : listedAt
Number of Unique values: 177
-------------------------------------------
column : daysOnMarket
Number of Unique values: 204
-------------------------------------------
column : availableFrom
Number of Unique values: 309
-------------------------------------------
column : street
Number of Unique values: 7842
-------------------------------------------
column : price
Number of Unique values: 1378
-------------------------------------------
column : borough
Number of Unique values: 5
values list: ['Brooklyn', 'Manhattan', 'Queens', 'Bronx', 'Staten Island']
-------------------------------------------
column : neighborhood
Number of Unique values: 163
-------------------------------------------
column : zip
Number of Unique values: 166
-------------------------------------------
column : propertyType
Number of Unique values: 9
values list: ['rental', 'coop', 'house', 'condo', 'townhouse', 'Apartment', 'Townhouse', 'House', 'Con

In [81]:
# for daysOnMarket
# Check null values 
print("Number of null values:", df['daysOnMarket'].isnull().sum())
pd.set_option('display.max_columns', None)
# Get basic statistics
print("\nStatistics for daysOnMarket:")
print(df['daysOnMarket'].describe())

Number of null values: 0

Statistics for daysOnMarket:
count    7842.000000
mean       18.371716
std        35.188656
min        -1.000000
25%         2.000000
50%         9.000000
75%        22.000000
max       958.000000
Name: daysOnMarket, dtype: float64


In [82]:
print("rows with negative daysOnMarket:",len(df[df['daysOnMarket']<0]))
negative_days_on_market_df = df[df['daysOnMarket']<0]
negative_days_on_market_df.head()

# looks like days on market is -1 when listedAt is NaN

rows with negative daysOnMarket: 1239


Unnamed: 0,listedAt,daysOnMarket,availableFrom,street,price,borough,neighborhood,zip,propertyType,beds,baths,latitude,longitude,amenities,builtIn,description,hasVideo,PhotosNum,state,isUndisclosedAddress,isFeaturedListing,duplicate,Total Population,Median Age,Male Population,Female Population,White Alone,Black or African American Alone,Asian Alone,Hispanic or Latino,Number of Households,Median Household Income,Per Capita Income,Population Below Poverty Level,Employed,Unemployed,Total Income Distribution,Median Gross Rent,Median Home Value,Occupied Housing Units,Vacant Housing Units,"Owner-Occupied Units (value < $100,000)",Monthly Housing Costs,High School Graduate (Age 25+),Bachelor’s Degree (Age 25+),Graduate or Professional Degree (Age 25+),English Only,Spanish,Never Married,Currently Married,Divorced,Precinct,schools_in_precinct,nearby_subway_stations,crime_rate
2798,,-1,2024-11-18 00:00:00,511 Van Buren St #3R,"$2,400/mo",Brooklyn,,11221,Apartment,1,1.0,40.69199,-73.9285,,,,False,7,NY,False,True,False,91236.0,32.3,42539.0,48697.0,21286.0,39370.0,4668.0,28299.0,35245.0,77600.0,40787.0,19547.0,49132.0,4586.0,35245.0,1937.0,1082200.0,35245.0,2717.0,220.0,2031.0,13205.0,17249.0,458.0,,,21906.0,11845.0,2080.0,81,33,3,1.214623
2799,,-1,,607 Gates Ave #1E,"$2,700/mo",Brooklyn,,11221,Apartment,2,1.0,40.687233,-73.94265,,,,False,8,NY,False,True,False,91236.0,32.3,42539.0,48697.0,21286.0,39370.0,4668.0,28299.0,35245.0,77600.0,40787.0,19547.0,49132.0,4586.0,35245.0,1937.0,1082200.0,35245.0,2717.0,220.0,2031.0,13205.0,17249.0,458.0,,,21906.0,11845.0,2080.0,79,46,0,1.591764
2801,,-1,,460 Throop Ave APT 10,"$2,400/mo",Brooklyn,,11221,Apartment,1,1.0,40.688168,-73.942116,,,,False,5,NY,False,True,False,91236.0,32.3,42539.0,48697.0,21286.0,39370.0,4668.0,28299.0,35245.0,77600.0,40787.0,19547.0,49132.0,4586.0,35245.0,1937.0,1082200.0,35245.0,2717.0,220.0,2031.0,13205.0,17249.0,458.0,,,21906.0,11845.0,2080.0,79,46,0,1.591764
2803,,-1,2024-11-20 00:00:00,509 Van Buren St #5R,"$2,500/mo",Brooklyn,,11221,Apartment,1,1.0,40.691982,-73.928566,,,,False,9,NY,False,True,False,91236.0,32.3,42539.0,48697.0,21286.0,39370.0,4668.0,28299.0,35245.0,77600.0,40787.0,19547.0,49132.0,4586.0,35245.0,1937.0,1082200.0,35245.0,2717.0,220.0,2031.0,13205.0,17249.0,458.0,,,21906.0,11845.0,2080.0,81,33,3,1.214623
2817,,-1,2024-12-01 00:00:00,499 Evergreen Ave APT 3B,"$3,050/mo",Brooklyn,,11221,Apartment,1,1.0,40.691,-73.91623,,,,False,0,NY,,True,False,91236.0,32.3,42539.0,48697.0,21286.0,39370.0,4668.0,28299.0,35245.0,77600.0,40787.0,19547.0,49132.0,4586.0,35245.0,1937.0,1082200.0,35245.0,2717.0,220.0,2031.0,13205.0,17249.0,458.0,,,21906.0,11845.0,2080.0,83,37,0,1.37311


In [50]:
# Replace negative values with NaN
df.loc[df['daysOnMarket'] < 0, 'daysOnMarket'] = np.nan

print("Number of negative values:", len(df[df['daysOnMarket'] < 0]))
print("Number of NaN values:", df['daysOnMarket'].isnull().sum())

Number of negative values: 0
Number of NaN values: 1239


In [85]:
# for price
print("Number of null values:", df['price'].isnull().sum())
print(df['price'].value_counts())

Number of null values: 0
price
$3,000/mo    182
$3,500/mo    155
$2,800/mo    127
$3,200/mo    125
$2,600/mo     97
            ... 
3296           1
3346           1
3126           1
4159           1
$1,625/mo      1
Name: count, Length: 1378, dtype: int64


In [86]:
# Remove '$' and '/mo' and convert to numeric
df['price'] = df['price'].replace('[\\$,/mo]', '', regex=True).astype(float)

# Verify the cleaning
print("After cleaning:")
print("Number of null values:", df['price'].isnull().sum())
print("\nBasic statistics:")
print(df['price'].describe())

After cleaning:
Number of null values: 0

Basic statistics:
count      7842.000000
mean       4638.869421
std        5976.426868
min        1199.000000
25%        2625.000000
50%        3250.000000
75%        4295.000000
max      125000.000000
Name: price, dtype: float64


In [87]:
# for zip
# Check null values 
print("Number of null values:", df['zip'].isnull().sum()) # NO NULL VALUES
print(df['zip'].describe()) # no negative values - GOOD


Number of null values: 0
count     7842.000000
mean     10651.059551
std        577.309660
min      10001.000000
25%      10025.000000
50%      10467.000000
75%      11221.000000
max      12345.000000
Name: zip, dtype: float64


In [88]:
# for propertyType
print("Number of null values:", df['propertyType'].isnull().sum()) # no null values
print(df['propertyType'].value_counts()) # need to lowercase everything

Number of null values: 0
propertyType
Apartment    4713
rental       1973
condo         507
coop          236
House         201
Townhouse     116
house          59
Condo          22
townhouse      15
Name: count, dtype: int64


In [89]:
df['propertyType'] = df['propertyType'].apply(lambda x:x.lower())
print(df['propertyType'].value_counts()) # done

propertyType
apartment    4713
rental       1973
condo         529
house         260
coop          236
townhouse     131
Name: count, dtype: int64


In [90]:
# for beds
print("Number of null values:", df['beds'].isnull().sum()) # no null values
print(df['beds'].value_counts()) # looks all good, 0 beds could mean studio

Number of null values: 0
beds
1     2401
2     2356
3     1665
0      934
4      390
5       70
6       20
8        4
10       1
7        1
Name: count, dtype: int64


In [91]:
# for latitude
print("Number of null values:", df['latitude'].isnull().sum()) # no null values
print("negative values:",len(df[df['latitude']<=0])) # looks good for NYC
print(df['latitude'].value_counts()) # looks all good


Number of null values: 0
negative values: 0
latitude
40.710201    6
40.746300    6
40.761299    5
40.707901    5
40.750198    5
            ..
40.701070    1
40.699270    1
40.685238    1
40.695530    1
40.709740    1
Name: count, Length: 7360, dtype: int64


In [92]:
# for longitude
print("Number of null values:", df['longitude'].isnull().sum()) # no null values
print("negative values:",len(df[df['longitude']<=0])) # looks good for NYC
print(df['longitude'].value_counts()) # looks all good

Number of null values: 0
negative values: 7842
longitude
-73.992302    7
-73.989304    6
-73.984200    6
-73.972702    6
-73.966904    5
             ..
-73.890145    1
-73.877762    1
-73.876597    1
-73.894056    1
-74.006620    1
Name: count, Length: 7002, dtype: int64


In [93]:
# for amenities
print("Number of null values:", df['amenities'].isnull().sum()) # too many null valus. over 50%. probably drop?
print(df['amenities'].value_counts())

Number of null values: 5052
amenities
['fios_available']                                                                                                                                                                                                                                                                                                                       118
['fios_available', 'hardwood_floors']                                                                                                                                                                                                                                                                                                     54
['fios_available', 'hardwood_floors', 'pets']                                                                                                                                                                                                                                                           

In [94]:
# for builtIn
print("Number of null values:", df['builtIn'].isnull().sum()) # too many null valus. over 50%. probably drop?
print("negative values:",len(df[df['builtIn']<=0])) # negattive values too?? dropping for sure

Number of null values: 5052
negative values: 77


In [95]:
# for hasVideo
print("Number of null values:", df['hasVideo'].isnull().sum()) # no null values
print(df['hasVideo'].value_counts()) # can clean this

Number of null values: 0
hasVideo
False    5049
1        2790
True        3
Name: count, dtype: int64


In [96]:
df['hasVideo'] = df['hasVideo'].apply(lambda x:1 if x=='True' or x=='1' else 0)
print(df['hasVideo'].value_counts()) # done

hasVideo
0    5049
1    2793
Name: count, dtype: int64


In [97]:
# for PhotosNum
print("Number of null values:", df['PhotosNum'].isnull().sum()) # no null values
print("negative values:",len(df[df['PhotosNum']<0])) 
print(df['PhotosNum'].value_counts())

Number of null values: 0
negative values: 0
PhotosNum
0     3793
6      431
7      415
8      364
9      355
5      299
10     291
20     245
11     238
12     215
4      207
13     164
14     159
15     125
16     101
17      97
18      74
3       72
19      55
1       33
2       22
23      11
22      11
25      10
21       8
24       8
32       6
27       5
28       5
26       5
29       3
33       2
30       2
36       2
48       1
46       1
34       1
44       1
35       1
51       1
73       1
37       1
31       1
Name: count, dtype: int64


In [98]:
# for state
print("Number of null values:", df['state'].isnull().sum()) # no null values
print(df['state'].value_counts()) # we know we have taken all of these from NYC in NY state. so we can just drop this column, all values will be NY here

Number of null values: 2790
state
NY    5052
Name: count, dtype: int64


In [99]:
# for isUndisclosedAddress
print("Number of null values:", df['isUndisclosedAddress'].isnull().sum()) # no null values
print(df['isUndisclosedAddress'].value_counts()) # just dropping this, too many null

Number of null values: 6552
isUndisclosedAddress
False    1290
Name: count, dtype: int64


In [100]:
# for isFeaturedListing
print("Number of null values:", df['isFeaturedListing'].isnull().sum()) 
print(df['isFeaturedListing'].value_counts()) # can drop

Number of null values: 2790
isFeaturedListing
True     3729
False    1323
Name: count, dtype: int64


In [101]:
# for duplicate
print("Number of null values:", df['duplicate'].isnull().sum()) 
print(df['duplicate'].value_counts()) # can drop

Number of null values: 0
duplicate
False    7842
Name: count, dtype: int64


In [102]:
# for Total Population
print("Number of null values:", df['Total Population'].isnull().sum()) 
print(df['Total Population'].value_counts())

Number of null values: 870
Total Population
54369.0    207
53877.0    181
91236.0    176
58418.0    158
65511.0    147
          ... 
42792.0      2
43517.0      2
0.0          1
7365.0       1
47230.0      1
Name: count, Length: 131, dtype: int64


In [103]:
len(df[df['Total Population'].isnull() | df['Median Age'].isnull() | df['Male Population'].isnull() | df['White Alone'].isnull()]) 
#looks like all census data is null at the same rows
# we can delete those rows maybe considering census data could be important?

870

In [104]:

census_columns_to_transform = ['Total Population', 'Median Age', 'Male Population', 'Female Population', 
                 'White Alone', 'Black or African American Alone', 'Asian Alone', 
                 'Hispanic or Latino','Population Below Poverty Level', 'Employed', 
                 'Unemployed','High School Graduate (Age 25+)', 'Bachelor’s Degree (Age 25+)',
                 'Graduate or Professional Degree (Age 25+)', 'English Only', 'Spanish',
                 'Never Married', 'Currently Married', 'Divorced']

# Calculate ratio columns
for column in census_columns_to_transform:
    if column != 'Total Population':  # Skip Total Population to avoid division by itself
        df[f'{column}_ratio'] = df[column] / df['Total Population']

In [105]:
df.columns

Index(['listedAt', 'daysOnMarket', 'availableFrom', 'street', 'price',
       'borough', 'neighborhood', 'zip', 'propertyType', 'beds', 'baths',
       'latitude', 'longitude', 'amenities', 'builtIn', 'description',
       'hasVideo', 'PhotosNum', 'state', 'isUndisclosedAddress',
       'isFeaturedListing', 'duplicate', 'Total Population', 'Median Age',
       'Male Population', 'Female Population', 'White Alone',
       'Black or African American Alone', 'Asian Alone', 'Hispanic or Latino',
       'Number of Households', 'Median Household Income', 'Per Capita Income',
       'Population Below Poverty Level', 'Employed', 'Unemployed',
       'Total Income Distribution', 'Median Gross Rent', 'Median Home Value',
       'Occupied Housing Units', 'Vacant Housing Units',
       'Owner-Occupied Units (value < $100,000)', 'Monthly Housing Costs',
       'High School Graduate (Age 25+)', 'Bachelor’s Degree (Age 25+)',
       'Graduate or Professional Degree (Age 25+)', 'English Only', 'Spanis

In [106]:
# dropping the OG census columns
# Drop original census columns
df = df.drop(columns=census_columns_to_transform)

In [107]:
df.columns

Index(['listedAt', 'daysOnMarket', 'availableFrom', 'street', 'price',
       'borough', 'neighborhood', 'zip', 'propertyType', 'beds', 'baths',
       'latitude', 'longitude', 'amenities', 'builtIn', 'description',
       'hasVideo', 'PhotosNum', 'state', 'isUndisclosedAddress',
       'isFeaturedListing', 'duplicate', 'Number of Households',
       'Median Household Income', 'Per Capita Income',
       'Total Income Distribution', 'Median Gross Rent', 'Median Home Value',
       'Occupied Housing Units', 'Vacant Housing Units',
       'Owner-Occupied Units (value < $100,000)', 'Monthly Housing Costs',
       'Precinct', 'schools_in_precinct', 'nearby_subway_stations',
       'crime_rate', 'Median Age_ratio', 'Male Population_ratio',
       'Female Population_ratio', 'White Alone_ratio',
       'Black or African American Alone_ratio', 'Asian Alone_ratio',
       'Hispanic or Latino_ratio', 'Population Below Poverty Level_ratio',
       'Employed_ratio', 'Unemployed_ratio',
       'Hig

In [108]:
df.head()

Unnamed: 0,listedAt,daysOnMarket,availableFrom,street,price,borough,neighborhood,zip,propertyType,beds,baths,latitude,longitude,amenities,builtIn,description,hasVideo,PhotosNum,state,isUndisclosedAddress,isFeaturedListing,duplicate,Number of Households,Median Household Income,Per Capita Income,Total Income Distribution,Median Gross Rent,Median Home Value,Occupied Housing Units,Vacant Housing Units,"Owner-Occupied Units (value < $100,000)",Monthly Housing Costs,Precinct,schools_in_precinct,nearby_subway_stations,crime_rate,Median Age_ratio,Male Population_ratio,Female Population_ratio,White Alone_ratio,Black or African American Alone_ratio,Asian Alone_ratio,Hispanic or Latino_ratio,Population Below Poverty Level_ratio,Employed_ratio,Unemployed_ratio,High School Graduate (Age 25+)_ratio,Bachelor’s Degree (Age 25+)_ratio,Graduate or Professional Degree (Age 25+)_ratio,English Only_ratio,Spanish_ratio,Never Married_ratio,Currently Married_ratio,Divorced_ratio
0,2024-11-25,1,2024-11-25,171 East 96th Street #4A,1855.0,Brooklyn,brownsville,11212,rental,1,1.0,40.662186,-73.922293,"['fios_available', 'hardwood_floors', 'nyc_eva...",1926.0,Large newly renovated 1-bedroom \n\nFeatures: ...,1,4,,,,False,32091.0,35840.0,21502.0,32091.0,1150.0,565200.0,32091.0,2438.0,154.0,1196.0,67,41,3,1.893937,0.000443,0.424755,0.575245,0.066448,0.730543,0.007845,0.194808,0.324774,0.369616,0.061293,0.200164,0.079363,0.002393,,,0.188141,0.100302,0.026939
1,2024-11-22,4,2024-11-22,93 East 96th Street #11,2235.0,Brooklyn,brownsville,11212,rental,2,1.0,40.663817,-73.924122,"['fios_available', 'hardwood_floors', 'live_in...",1926.0,Beautifully newly Renovated Two-Bedroom Apartm...,1,5,,,,False,32091.0,35840.0,21502.0,32091.0,1150.0,565200.0,32091.0,2438.0,154.0,1196.0,67,41,3,1.893937,0.000443,0.424755,0.575245,0.066448,0.730543,0.007845,0.194808,0.324774,0.369616,0.061293,0.200164,0.079363,0.002393,,,0.188141,0.100302,0.026939
2,2024-11-22,4,2024-11-22,501 Saratoga Avenue #B,1750.0,Brooklyn,brownsville,11212,rental,1,1.0,40.668506,-73.917055,"['fios_available', 'hardwood_floors', 'live_in...",1930.0,Stunning Newly Renovated One-Bedroom Apartment...,1,7,,,,False,32091.0,35840.0,21502.0,32091.0,1150.0,565200.0,32091.0,2438.0,154.0,1196.0,73,56,0,2.036421,0.000443,0.424755,0.575245,0.066448,0.730543,0.007845,0.194808,0.324774,0.369616,0.061293,0.200164,0.079363,0.002393,,,0.188141,0.100302,0.026939
3,2024-11-20,6,2024-11-20,1115 Willmohr Street #2P,1850.0,Brooklyn,brownsville,11212,rental,1,1.0,40.657349,-73.91775,"['cats', 'doorman', 'elevator', 'fios_availabl...",1961.0,BIG 1 Bedroom LUXURY APARTMENT\n\nThis 1 bathr...,1,4,,,,False,32091.0,35840.0,21502.0,32091.0,1150.0,565200.0,32091.0,2438.0,154.0,1196.0,67,41,3,1.893937,0.000443,0.424755,0.575245,0.066448,0.730543,0.007845,0.194808,0.324774,0.369616,0.061293,0.200164,0.079363,0.002393,,,0.188141,0.100302,0.026939
4,2024-11-19,7,2024-11-19,1075 Clarkson Avenue #2B,3000.0,Brooklyn,brownsville,11212,rental,3,1.0,40.659825,-73.922602,"['central_ac', 'fios_available', 'hardwood_flo...",1910.0,Newly Renovated – 3BR/1BA in Brownsville on Cl...,1,9,,,,False,32091.0,35840.0,21502.0,32091.0,1150.0,565200.0,32091.0,2438.0,154.0,1196.0,67,41,0,1.893937,0.000443,0.424755,0.575245,0.066448,0.730543,0.007845,0.194808,0.324774,0.369616,0.061293,0.200164,0.079363,0.002393,,,0.188141,0.100302,0.026939


In [110]:
# dropping rest of the columns i do not need
columns_to_drop = ['state','isUndisclosedAddress','isFeaturedListing','duplicate','English Only_ratio','Spanish_ratio']
df.drop(columns=columns_to_drop,inplace=True)

In [111]:
df.head()

Unnamed: 0,listedAt,daysOnMarket,availableFrom,street,price,borough,neighborhood,zip,propertyType,beds,baths,latitude,longitude,amenities,builtIn,description,hasVideo,PhotosNum,Number of Households,Median Household Income,Per Capita Income,Total Income Distribution,Median Gross Rent,Median Home Value,Occupied Housing Units,Vacant Housing Units,"Owner-Occupied Units (value < $100,000)",Monthly Housing Costs,Precinct,schools_in_precinct,nearby_subway_stations,crime_rate,Median Age_ratio,Male Population_ratio,Female Population_ratio,White Alone_ratio,Black or African American Alone_ratio,Asian Alone_ratio,Hispanic or Latino_ratio,Population Below Poverty Level_ratio,Employed_ratio,Unemployed_ratio,High School Graduate (Age 25+)_ratio,Bachelor’s Degree (Age 25+)_ratio,Graduate or Professional Degree (Age 25+)_ratio,Never Married_ratio,Currently Married_ratio,Divorced_ratio
0,2024-11-25,1,2024-11-25,171 East 96th Street #4A,1855.0,Brooklyn,brownsville,11212,rental,1,1.0,40.662186,-73.922293,"['fios_available', 'hardwood_floors', 'nyc_eva...",1926.0,Large newly renovated 1-bedroom \n\nFeatures: ...,1,4,32091.0,35840.0,21502.0,32091.0,1150.0,565200.0,32091.0,2438.0,154.0,1196.0,67,41,3,1.893937,0.000443,0.424755,0.575245,0.066448,0.730543,0.007845,0.194808,0.324774,0.369616,0.061293,0.200164,0.079363,0.002393,0.188141,0.100302,0.026939
1,2024-11-22,4,2024-11-22,93 East 96th Street #11,2235.0,Brooklyn,brownsville,11212,rental,2,1.0,40.663817,-73.924122,"['fios_available', 'hardwood_floors', 'live_in...",1926.0,Beautifully newly Renovated Two-Bedroom Apartm...,1,5,32091.0,35840.0,21502.0,32091.0,1150.0,565200.0,32091.0,2438.0,154.0,1196.0,67,41,3,1.893937,0.000443,0.424755,0.575245,0.066448,0.730543,0.007845,0.194808,0.324774,0.369616,0.061293,0.200164,0.079363,0.002393,0.188141,0.100302,0.026939
2,2024-11-22,4,2024-11-22,501 Saratoga Avenue #B,1750.0,Brooklyn,brownsville,11212,rental,1,1.0,40.668506,-73.917055,"['fios_available', 'hardwood_floors', 'live_in...",1930.0,Stunning Newly Renovated One-Bedroom Apartment...,1,7,32091.0,35840.0,21502.0,32091.0,1150.0,565200.0,32091.0,2438.0,154.0,1196.0,73,56,0,2.036421,0.000443,0.424755,0.575245,0.066448,0.730543,0.007845,0.194808,0.324774,0.369616,0.061293,0.200164,0.079363,0.002393,0.188141,0.100302,0.026939
3,2024-11-20,6,2024-11-20,1115 Willmohr Street #2P,1850.0,Brooklyn,brownsville,11212,rental,1,1.0,40.657349,-73.91775,"['cats', 'doorman', 'elevator', 'fios_availabl...",1961.0,BIG 1 Bedroom LUXURY APARTMENT\n\nThis 1 bathr...,1,4,32091.0,35840.0,21502.0,32091.0,1150.0,565200.0,32091.0,2438.0,154.0,1196.0,67,41,3,1.893937,0.000443,0.424755,0.575245,0.066448,0.730543,0.007845,0.194808,0.324774,0.369616,0.061293,0.200164,0.079363,0.002393,0.188141,0.100302,0.026939
4,2024-11-19,7,2024-11-19,1075 Clarkson Avenue #2B,3000.0,Brooklyn,brownsville,11212,rental,3,1.0,40.659825,-73.922602,"['central_ac', 'fios_available', 'hardwood_flo...",1910.0,Newly Renovated – 3BR/1BA in Brownsville on Cl...,1,9,32091.0,35840.0,21502.0,32091.0,1150.0,565200.0,32091.0,2438.0,154.0,1196.0,67,41,0,1.893937,0.000443,0.424755,0.575245,0.066448,0.730543,0.007845,0.194808,0.324774,0.369616,0.061293,0.200164,0.079363,0.002393,0.188141,0.100302,0.026939


In [112]:
df.to_csv('final_cleaned_data.csv')