## Installations Required
<br>
!pip install geopandas
<br>
!pip3 install shapely==1.5.17.post1
<br>
!pip install geojsonio
<br>
!pip install langdetect
<br>
!pip install cufflinks
<br>
!pip3 install pickle5

## Import Statements

In [None]:
import pandas as pd 
import numpy as np
import os
import glob   
import gc
import time 
from collections import Counter
import seaborn as sns 
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
#https://towardsdatascience.com/sentimental-analysis-using-vader-a3415fef766
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from tqdm import tqdm, tqdm_pandas
from bs4 import BeautifulSoup
#Plotly Tools
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.offline as offline
offline.init_notebook_mode()
from plotly import tools
import plotly.tools as tls
init_notebook_mode(connected=True)
#https://stackoverflow.com/questions/55132071/series-object-has-no-attribute-iplot/55132247
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)
from sklearn.preprocessing import MinMaxScaler

from plotly.offline import iplot
import plotly.graph_objs as go
import pickle5 as pickle

## Global Variables

In [None]:
#File Location Variables
_DataFolderPath="raw_data_csv"
_LocationName="NY"
_ListingCSV="listings"
_ReviewsCSV="reviews"
_NeighbourhoodsCSV="neighbourhoods.csv"
_CalendarCSV="calendar"
_NeighbourhoodsJson="neighbourhoods.geojson"
_LocationPath=_DataFolderPath +"/" + _LocationName
_PickleFilesFolder="pickle_files"
_PickleFile_Merged_Listing_NY="Merged_Listing_NY"
scaler = MinMaxScaler()


# Helper Methods

In [None]:
def enable_plotly_in_cell():
  import IPython
  from plotly.offline import init_notebook_mode
  display(IPython.core.display.HTML('''<script src="/static/components/requirejs/require.js"></script>'''))
  init_notebook_mode(connected=False)

In [None]:
#Method to clear Empty Spaces
def replaceSpaces(text):
    temp= str(text)
    temp=text.strip()
    temp=temp.replace('\\r', '')
    temp=temp.replace('\\"', '')
    temp=temp.replace('\\n', '')
    temp=temp.replace(' ', '_')
    return temp

In [None]:
# https://stackoverflow.com/a/47091490/4084039
import re
def decontracted(phrase):
    # specific
    phrase = re.sub(r"won't", "will not", phrase)
    phrase = re.sub(r"can\'t", "can not", phrase)
    # general
    phrase = re.sub(r"n\'t", " not", phrase)
    phrase = re.sub(r"\'re", " are", phrase)
    phrase = re.sub(r"\'s", " is", phrase)
    phrase = re.sub(r"\'d", " would", phrase)
    phrase = re.sub(r"\'ll", " will", phrase)
    phrase = re.sub(r"\'t", " not", phrase)
    phrase = re.sub(r"\'ve", " have", phrase)
    phrase = re.sub(r"\'m", " am", phrase)
    return phrase

In [None]:
#https://stackoverflow.com/questions/60214194/error-in-reading-stock-data-datetimeproperties-object-has-no-attribute-week
#https://docs.python.org/3/library/time.html
def getWeekDayNumber(text):
    return time.strptime(text, '%A').tm_wday

def getMonthNumber(text):
    return time.strptime(text, '%B').tm_mon

In [None]:
nltk.download('stopwords')

In [None]:
# To get the results in 4 decemal points
SAFE_DIV = 0.0001 
STOP_WORDS = stopwords.words("english")

def preprocess(x):
    x = str(x).lower()
    x = x.replace(",000,000", "m").replace(",000", "k").replace("′", "'").replace("’", "'")\
                           .replace("won't", "will not").replace("cannot", "can not").replace("can't", "can not")\
                           .replace("n't", " not").replace("what's", "what is").replace("it's", "it is")\
                           .replace("'ve", " have").replace("i'm", "i am").replace("'re", " are")\
                           .replace("he's", "he is").replace("she's", "she is").replace("'s", " own")\
                           .replace("%", " percent ").replace("₹", " rupee ").replace("$", " dollar ")\
                           .replace("€", " euro ").replace("'ll", " will")
    x = re.sub(r"([0-9]+)000000", r"\1m", x)
    x = re.sub(r"([0-9]+)000", r"\1k", x)
    
    
    porter = PorterStemmer()
    pattern = re.compile('\W')
    
    if type(x) == type(''):
        x = re.sub(pattern, ' ', x)
    
    
    if type(x) == type(''):
        x = porter.stem(x)
        example1 = BeautifulSoup(x)
        x = example1.get_text()
               
    
    return x
    

In [None]:
nltk.download('vader_lexicon')

In [None]:
SENTIMENT_ANALYZER = SentimentIntensityAnalyzer()
def getSentimentScore_Compound(text):
    statement_polarity = SENTIMENT_ANALYZER.polarity_scores(text)
    return statement_polarity['compound']

def getSentimentScore_Compound(text):
    statement_polarity = SENTIMENT_ANALYZER.polarity_scores(text)
    return statement_polarity['compound']

In [None]:
with open(_PickleFilesFolder + "/" + _PickleFile_Merged_Listing_NY,'rb') as f:
    _DF_LISTING_EDA = pickle.load(f)

# EDA For Listings

In [None]:
#We don't need following columns:
'''
1. listing_url -" URL for Every Listing we dont need this"
2. scrape_id -: Related toData Collection
3. last_scraped =" Lasy date of data collected"
4. name of property
5. picture_url
6.host_url                                      
7..host_thumbnail_url
'''

In [None]:
_DF_LISTING_EDA=_DF_LISTING_EDA.drop(columns=['listing_url', 'scrape_id','last_scraped','name','picture_url','host_url','host_url','host_thumbnail_url'])
_DF_LISTING_EDA=_DF_LISTING_EDA.reset_index(drop=True)

In [None]:
#https://www.kaggle.com/mistrzuniu1/tutorial-eda-feature-selection-regression
total = _DF_LISTING_EDA.isnull().sum().sort_values(ascending = False)
percent = (_DF_LISTING_EDA.isnull().sum()/_DF_LISTING_EDA.isnull().count()*100).sort_values(ascending = False)
missing_data  = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
pd.set_option('display.max_rows', 500)
missing_data


In [None]:
del missing_data

In [None]:
#Dropping Columns which are 100 % Null

In [None]:
_DF_LISTING_EDA=_DF_LISTING_EDA.drop(columns=['bathrooms','calendar_updated','license'])
_DF_LISTING_EDA=_DF_LISTING_EDA.reset_index(drop=True)

In [None]:
#Text Preprocessing and NLP for host_about

In [None]:
_DF_LISTING_EDA.fillna({'host_about':'na'}, inplace=True)

In [None]:

_DF_LISTING_EDA["host_about"] = _DF_LISTING_EDA["host_about"].fillna("na").apply(preprocess)


In [None]:

_DF_LISTING_EDA['host_about_len']=_DF_LISTING_EDA.host_about.apply(len)

In [None]:

_DF_LISTING_EDA=_DF_LISTING_EDA.drop(columns=['host_about'])
_DF_LISTING_EDA=_DF_LISTING_EDA.reset_index(drop=True)

In [None]:
# NLP and Text Processing of  host_neighbourhood

In [None]:
%%time
_DF_LISTING_EDA["host_neighbourhoodd"] = _DF_LISTING_EDA["host_neighbourhood"].fillna("na").apply(preprocess)


In [None]:
_DF_LISTING_EDA["host_neighbourhoodd"].unique()

In [None]:
#fill Null Value with 0 or na
_DF_LISTING_EDA.fillna({'reviews_per_month':0}, inplace=True)
_DF_LISTING_EDA.fillna({'neighborhood_overview':'na'}, inplace=True)
_DF_LISTING_EDA.fillna({'neighbourhood':'na'}, inplace=True)

In [None]:
#https://stackoverflow.com/questions/60102928/pandas-fillna-only-numeric-int-or-float-columns
numeric_columns = _DF_LISTING_EDA.select_dtypes(include=['number']).columns


In [None]:
# fill 0 to all NaN 
_DF_LISTING_EDA[numeric_columns] = _DF_LISTING_EDA[numeric_columns].fillna(0)

In [None]:
_DF_LISTING_EDA.fillna('na')

In [None]:
#https://www.kaggle.com/mistrzuniu1/tutorial-eda-feature-selection-regression
total = _DF_LISTING_EDA.isnull().sum().sort_values(ascending = False)
percent = (_DF_LISTING_EDA.isnull().sum()/_DF_LISTING_EDA.isnull().count()*100).sort_values(ascending = False)
missing_data  = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
pd.set_option('display.max_rows', 500)
missing_data

In [None]:
del missing_data

In [None]:
_DF_LISTING_EDA[_DF_LISTING_EDA.id <=0]

In [None]:
#convert price to Float and Remove Special Characters

In [None]:
_DF_LISTING_EDA['price'].replace(regex=True, inplace=True, to_replace=r'[^0-9.\-]',value=r'')
_DF_LISTING_EDA['price'] = _DF_LISTING_EDA['price'].astype(float)

In [None]:
#Find if Price <=0 we will remove if price is <=0

In [None]:
_DF_LISTING_EDA[_DF_LISTING_EDA.price <=0]

In [None]:
_DF_LISTING_EDA.drop(_DF_LISTING_EDA[_DF_LISTING_EDA.price <=0].index, inplace = True) 

In [None]:
_DF_LISTING_EDA[_DF_LISTING_EDA.price <=0]

# Target Feature

## Price

In [None]:
_DF_LISTING_EDA['price'].describe()



In [None]:
_DF_LISTING_EDA['price'].median()


In [None]:
sns.distplot(_DF_LISTING_EDA['price'])

In [None]:
plt.figure(figsize=(8,6))
sns.distplot(_DF_LISTING_EDA['price'], kde=False);


In [None]:
'''
As a general rule of thumb: If skewness is less than -1 or greater than 1, 
the distribution is highly skewed. If skewness is between -1 and -0.5 or between 0.5 and 1, the 
distribution is moderately skewed. If skewness is between -0.5 and 0.5, the distribution is 
approximately symmetric.
'''

In [None]:
# skewness along the index axis 
_DF_LISTING_EDA.skew(axis = 0, skipna = True) 

# Will Do Univariate and Bi-Univariate Ananlysis for Each Column

## Features-:

### 1.   Neighborhood

#### 1 .1  Neighborhood (Categories)

In [None]:
neighbourhood_values = list(_DF_LISTING_EDA['neighbourhood'].values)
# remove special characters from list of strings python: https://stackoverflow.com/a/47301924/4084039

# https://www.geeksforgeeks.org/removing-stop-words-nltk-python/
# https://stackoverflow.com/questions/23669024/how-to-strip-a-specific-word-from-a-string
# https://stackoverflow.com/questions/8270092/remove-all-whitespace-in-a-string-in-python

neighbourhood_list = []
for i in neighbourhood_values:
    temp = ""
    # consider we have text like this "NY,NJ,MH"
    if (str(i) != 'nan'):
        #print(i)   
        for j in i.split(','): # it will split it in three parts ["NY", "NJ", "MH"]
            j = j.replace(' ','') # we are placeing all the ' '(space) with ''(empty) 
            temp +=j.strip()+" "#" abc ".strip() will return "abc", remove the trailing spaces
            temp = temp.replace('&','_')
            temp = temp.replace('UnitedStates','USA')
            temp = temp.replace('NewJersey','NJ')
            temp = temp.replace('New Jersey','NJ')
            temp = temp.replace('United States','USA')
            temp = temp.replace('New York','NY')
            temp = temp.replace('NewYork','NY')
        neighbourhood_list.append(temp.strip())

_DF_LISTING_EDA['neighbourhood'] = neighbourhood_list

# count of all the words in corpus python: https://stackoverflow.com/a/22898595/4084039
my_counter = Counter()
for word in _DF_LISTING_EDA['neighbourhood'].values:
    my_counter.update(word.split())
    
neighbourhood_dict = dict(my_counter)
sorted_neighbourhood_dict = dict(sorted(neighbourhood_dict.items(), key=lambda kv: kv[1]))

#### 1.2   Neighborhood_overview      

In [None]:
#Doing the Folloing for neighborhood_overview
# 1.Text Preprocessing
# 2. Calculating Sentiment Scores
# 3. Calc Lenght of of Review
#T4. Calc Word Count in Review

In [None]:
%%time
_DF_LISTING_EDA['neighborhood_overview'] = _DF_LISTING_EDA["neighborhood_overview"].fillna("na").apply(preprocess)
_DF_LISTING_EDA['neighborhood_overview_score']=_DF_LISTING_EDA.neighborhood_overview.apply(getSentimentScore_Compound)
_DF_LISTING_EDA['neighborhood_overview_len']=_DF_LISTING_EDA.neighborhood_overview.apply(len)
_DF_LISTING_EDA['neighborhood_overview_word_count'] = _DF_LISTING_EDA['neighborhood_overview'].apply(lambda x: len(str(x).split()))
_DF_LISTING_EDA.drop(['neighborhood_overview'], axis=1, inplace=True)

In [None]:

df_Listing_nhood=_DF_LISTING_EDA[['neighbourhood','price','neighborhood_overview_score','neighborhood_overview_len','neighborhood_overview_word_count','neighbourhood_cleansed','longitude','latitude']]

df_Listing_nhood[['price','neighborhood_overview_score','neighborhood_overview_len','neighborhood_overview_word_count']] = scaler.fit_transform(df_Listing_nhood[['price','neighborhood_overview_score','neighborhood_overview_len','neighborhood_overview_word_count']])

In [None]:
# Analysis of Three features added for neighbourhood_overview
    # neighborhood_overview_score
    #neighborhood_overview_len
    #neighborhood_overview_word_count

In [None]:
_DF_LISTING_EDA.groupby('neighbourhood')['price'].mean().iplot(kind='bar',  xTitle='Neighbourhood', yTitle='Average Price')


In [None]:
df_Listing_nhood.groupby('neighbourhood_cleansed')['price'].mean().iplot(kind='bar',  xTitle='Neighbourhood', yTitle='Average Price')


In [None]:
df_Listing_nhood.groupby('neighbourhood')['price','neighborhood_overview_score'].mean().iplot()

In [None]:
df_Listing_nhood.groupby('neighbourhood')['price','neighborhood_overview_score',
                                                                  'neighborhood_overview_len','neighborhood_overview_word_count'].mean().iplot()

In [None]:
df_Listing_nhood.groupby('neighbourhood')['price','neighborhood_overview_len','neighborhood_overview_word_count'].mean().iplot()

In [None]:
df_Listing_nhood.groupby('neighbourhood')['price','neighborhood_overview_score',
                                                                  'neighborhood_overview_len','neighborhood_overview_word_count'].mean().corr().iplot(kind='heatmap',colorscale="Blues",title="Feature Correlation Matrix")

In [None]:
df_Listing_nhood[['price','neighbourhood','neighbourhood_cleansed','longitude','latitude']].corr().iplot(kind='heatmap',colorscale="Blues",title="Feature Correlation Matrix")

In [None]:
_DF_LISTING_EDA=_DF_LISTING_EDA.drop(columns=['longitude','longitude'])
_DF_LISTING_EDA=_DF_LISTING_EDA.reset_index(drop=True)

### Observation:

1. Over View Length, Overview Word Count are directly related to Avg price of Neighbour Hood
2. Over view Scores has no impact of score. Will remove this coulmn.

In [None]:
del df_Listing_nhood

In [None]:
stepcount="1"

In [None]:
_DF_LISTING_EDA.to_pickle(_PickleFilesFolder + "/" + _PickleFile_Merged_Listing_NY + "_" + stepcount)

In [None]:
_DF_LISTING_EDA.info(memory_usage="deeper")

### 2. Review Scores of Listings :

Will do Price Ananlysis and Multivarite Analysis with following features:

    # avg_review_score                              
 
    # total_reviews_count                             
 
    # avg_review_len                                
 
    # past_review_date                               
 
    # laste_review_date                              

In [None]:
_DF_LISTING_EDA['past_review_date'] = pd.to_datetime(_DF_LISTING_EDA['past_review_date'])
_DF_LISTING_EDA['laste_review_date'] = pd.to_datetime(_DF_LISTING_EDA['laste_review_date'])

In [None]:
#Getting Review Date Lenght in Days from First and Last Review

In [None]:
#https://stackoverflow.com/questions/37840812/pandas-subtracting-two-date-columns-and-the-result-being-an-integer/46966942

_DF_LISTING_EDA['ReviewDate_Diff'] = (_DF_LISTING_EDA['laste_review_date']-_DF_LISTING_EDA['past_review_date']).dt.days

In [None]:
_DF_LISTING_EDA.fillna({'ReviewDate_Diff':0}, inplace=True)

In [None]:

_DF_LISTING_EDA.drop(['laste_review_date','past_review_date'], axis = 1, inplace = True) 

In [None]:
# Min Max Scaler

In [None]:
df_nhood= _DF_LISTING_EDA[['neighbourhood','avg_review_score','total_reviews_count','avg_review_len','ReviewDate_Diff','host_id','price']]

In [None]:

df_nhood[['avg_review_score','total_reviews_count','avg_review_len','ReviewDate_Diff','price']]= scaler.fit_transform(df_nhood[['avg_review_score','total_reviews_count','avg_review_len','ReviewDate_Diff','price']])


In [None]:
df_nhood.groupby('neighbourhood')['price','avg_review_score'].mean().iplot()

### Listing Reviews Based on Neighbour Hoods

In [None]:
enable_plotly_in_cell()
df_nhood.groupby('neighbourhood')['price','total_reviews_count'].mean().iplot()

In [None]:
enable_plotly_in_cell()
df_nhood.groupby('neighbourhood')['price','avg_review_len'].mean().iplot()

In [None]:
enable_plotly_in_cell()
df_nhood.groupby('neighbourhood')['price','ReviewDate_Diff'].mean().iplot()

In [None]:
enable_plotly_in_cell()
df_nhood.groupby('neighbourhood')['price','avg_review_score','total_reviews_count','avg_review_len','ReviewDate_Diff'].mean().iplot()

In [None]:
enable_plotly_in_cell()
df_nhood.groupby('neighbourhood')['price','avg_review_score','total_reviews_count','avg_review_len','ReviewDate_Diff'].mean().corr().iplot(kind='heatmap',colorscale="Blues",title="Feature Correlation Matrix")

In [None]:
del  df_nhood

####  Observations:
1. Avergage Listing Scores has no impact on Neighbout Hood Listings 
2. Review Count and Review Date Diff has very much impact of Price of listings in Neighbour hood.

### 3.   HOST
      host_about_score                              
      host_about_len                                  
      host_neighbourhoodd  
      host_location                                  
      host_response_time                             
      host_response_rate                             
      host_acceptance_rate                           
      host_is_superhost                              
      host_picture_url                               
      host_neighbourhood                             
      host_listings_count                             
      host_total_listings_count                       
      host_verifications                             
      host_has_profile_pic                           
      host_identity_verified  
      host_since 

In [None]:
_DF_LISTING_EDA['host_location']=_DF_LISTING_EDA["host_location"].fillna('na').apply(preprocess)

In [None]:
_DF_LISTING_EDA['host_response_time']=_DF_LISTING_EDA["host_response_time"].fillna('na').apply(preprocess)

In [None]:
_DF_LISTING_EDA['host_response_rate']=_DF_LISTING_EDA["host_response_rate"].fillna('na').apply(preprocess)

In [None]:
_DF_LISTING_EDA['host_neighbourhood']=_DF_LISTING_EDA["host_neighbourhood"].fillna('na').apply(preprocess)

In [None]:
_DF_LISTING_EDA['host_acceptance_rate']= _DF_LISTING_EDA['host_acceptance_rate'].str.replace('%', '')
_DF_LISTING_EDA['host_acceptance_rate']=_DF_LISTING_EDA['host_acceptance_rate'].astype('float')


In [None]:
_DF_LISTING_EDA['host_acceptance_rate']=_DF_LISTING_EDA["host_acceptance_rate"].fillna(0)

In [None]:
 _DF_LISTING_EDA['host_response_rate']= _DF_LISTING_EDA['host_response_rate'].str.replace('na', '0')   
_DF_LISTING_EDA['host_response_rate']= _DF_LISTING_EDA['host_response_rate'].str.replace('percent', '')
_DF_LISTING_EDA['host_response_rate']=_DF_LISTING_EDA['host_response_rate'].astype('float')


In [None]:
_DF_LISTING_EDA['host_is_superhost'] = _DF_LISTING_EDA.host_is_superhost.map(lambda x: 1 if x == 't' else 0)

In [None]:
_DF_LISTING_EDA.host_picture_url

In [None]:
_DF_LISTING_EDA.drop(['host_picture_url'], axis = 1, inplace = True) 

In [None]:
_DF_LISTING_EDA['host_identity_verified'] = _DF_LISTING_EDA.host_identity_verified.map(lambda x: 1 if x == 't' else 0)

In [None]:
_DF_LISTING_EDA['host_verifications_types']=_DF_LISTING_EDA['host_verifications'].apply(lambda x: x.count(','))

In [None]:
_DF_LISTING_EDA.drop(['host_verifications'], axis = 1, inplace = True) 


In [None]:
_DF_LISTING_EDA["host_Since"] = pd.to_datetime(_DF_LISTING_EDA["host_since"])

In [None]:
#https://stackoverflow.com/questions/57011334/how-to-find-number-of-days-between-today-and-future-date/57013179
_DF_LISTING_EDA['host_age'] = ( pd.Timestamp('now')-_DF_LISTING_EDA['host_Since']).dt.days

In [None]:
_DF_LISTING_EDA.drop(['host_Since'], axis = 1, inplace = True) 

In [None]:
df_list_host=_DF_LISTING_EDA[['host_id','host_name','host_about_len',
                 'host_neighbourhoodd','host_location',
                 'host_response_time','host_response_rate','host_acceptance_rate','host_is_superhost',
                 'host_total_listings_count',
                 'host_has_profile_pic','host_verifications_types','host_age','price','id']]


In [None]:
df_list_host[['host_about_len','host_response_rate','host_acceptance_rate','host_total_listings_count','host_verifications_types','host_age','price']] = scaler.fit_transform(df_list_host[['host_about_len','host_response_rate','host_acceptance_rate','host_total_listings_count','host_verifications_types','host_age','price']])

In [None]:
enable_plotly_in_cell()
df_list_host.groupby('host_neighbourhoodd')['price'].mean().iplot(
    kind='bar',
    xTitle='Host Neighbourhood',
    linecolor='black',
    yTitle='Avg Price',
    title='Host Neighbour Hood Vs Price')

In [None]:
enable_plotly_in_cell()
df_list_host.groupby('host_location')['price'].mean().iplot(
    kind='bar',
    xTitle='Host Location',
    linecolor='black',
    yTitle='Price',
    title='Host Location vs Price')

In [None]:
enable_plotly_in_cell()
df_list_host.groupby('host_name')['host_about_len','price'].mean().iplot(
    )

In [None]:
enable_plotly_in_cell()
df_list_host.groupby('host_name')['price','host_response_rate'].mean().iplot(
    )

In [None]:
enable_plotly_in_cell()
df_list_host.groupby('host_name')['price','host_acceptance_rate'].mean().iplot(
    )

In [None]:

df_list_host['neighbourhood']= _DF_LISTING_EDA['neighbourhood']

In [None]:
enable_plotly_in_cell()
df_list_host.groupby('host_name')['price','host_total_listings_count'].mean().iplot(
    )

In [None]:
enable_plotly_in_cell()
df_list_host.groupby('host_name')['price','host_age'].mean().iplot(
    )

In [None]:
enable_plotly_in_cell()
df_list_host.groupby('host_name')['price','host_verifications_types'].mean().iplot(
    )

In [None]:
enable_plotly_in_cell()
df_list_host.groupby(['host_is_superhost','neighbourhood'])['price'].mean().iplot()

In [None]:
enable_plotly_in_cell()
df_list_host.groupby(['host_has_profile_pic','neighbourhood'])['price'].mean().iplot()

In [None]:
enable_plotly_in_cell()
df_list_host[['host_about_len','host_response_rate','host_acceptance_rate','host_total_listings_count','host_verifications_types','host_age','host_has_profile_pic','host_is_superhost','price']].corr().iplot(kind='heatmap',colorscale="Blues",title="Feature Correlation Matrix")

# Observation:

All features of host looks very much related price 

In [None]:
del df_list_host

In [None]:
stepcount="2"

In [None]:
_DF_LISTING_EDA.to_pickle(_PickleFilesFolder + "/" + _PickleFile_Merged_Listing_NY + "_" + stepcount)