# Chicago Information Only
We are applying the exact same steps as our 'Analysis.ipynb'. 
The only difference is that early on, we get the Chicago information only and 
apply the analysis on this data.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import statsmodels.formula.api as smf
import operator
from math import hypot
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv('train.csv')

In [3]:
properties = ['Apartment', 'House', 'Condomminium', 'Townhouse', 'Loft', 'Other']
def property_grouping(p):
    '''
    Given property type, if it is not an apartment, house, condo, townhouse, loft, or
    labeled as 'Other', set as 'Other'
    '''
    if p not in properties:
        p = 'Other'
    return p

def make_dummies(df):
    '''
    Take column of categories and make dummies in the dataset.
    Drop the original columns
    '''
    columns = list(df.columns)
    for column in columns:
        dummies = pd.get_dummies(df[column])
        df = df.join(dummies)
    df.columns = df.columns.str.replace(' ', '_')
    df.columns = df.columns.str.replace('/','_')
    df.columns = df.columns.str.replace('-', '_')
    df.drop(columns=columns,inplace=True)
    return df

In [4]:
lat_long = {'SF': {'lat': 37.7749, 'long': -122.4194},
            'Chicago': {'lat': 41.8781, 'long': -87.6298},
            'DC': {'lat': 38.9072, 'long': -77.0369},
            'LA': {'lat': 34.0522, 'long': -118.2437},
            'Boston': {'lat': 42.3601, 'long': -71.0589},
            'NYC': {'lat': 40.7128, 'long': -74.0060}}

def get_distance(row):
    '''
    Given a row of data, get the city, latitude, and longitude
    and calculate the distance from downtown.
    '''
    city = row['city']
    lat1 = lat_long[city]['lat']
    lat2 = row['latitude']
    long1 = lat_long[city]['long']
    long2 = row['longitude']
    lon = long1 - long2
    lat = lat1 - lat2
    a = (np.sin(lat/2))**2 + np.cos(lat1) * np.cos(lat2) * ((np.sin(lon/2))**2)
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    return 69.172*c

# Get only Chicago data here

In [5]:
#import data as dataframe
df = pd.read_csv('train.csv')
df = df.groupby('city').get_group('Chicago')

In [6]:
df.head()

Unnamed: 0,id,log_price,property_type,room_type,amenities,accommodates,bathrooms,bed_type,cancellation_policy,cleaning_fee,city,description,first_review,host_has_profile_pic,host_identity_verified,host_response_rate,host_since,instant_bookable,last_review,latitude,longitude,name,neighbourhood,number_of_reviews,review_scores_rating,thumbnail_url,zipcode,bedrooms,beds
15,6226658,4.094345,Apartment,Private room,"{Internet,""Wireless Internet"",""Air conditionin...",2,1.5,Real Bed,moderate,True,Chicago,Just west of vibrant Wicker Park and Ukranian ...,,t,t,100%,2012-09-10,f,,41.90824,-87.695242,Sweet Home Chicago,Humboldt Park,0,,https://a0.muscache.com/im/pictures/996e04cd-5...,60622,1.0,1.0
16,7499179,4.317488,Loft,Private room,"{TV,""Wireless Internet"",Kitchen,""Pets allowed""...",2,1.0,Real Bed,strict,False,Chicago,Our place IS Six Corners! Top floor of 4 story...,2016-08-14,t,f,,2016-07-29,f,2016-10-16,41.910308,-87.675261,Private room in Wicker Park loft at Six Corners,Wicker Park,15,91.0,https://a0.muscache.com/im/pictures/f382cb5a-1...,60622,1.0,1.0
40,19813606,5.521461,Condominium,Entire home/apt,"{TV,""Wireless Internet"",""Air conditioning"",Kit...",5,2.0,Real Bed,strict,True,Chicago,Modern two bedroom condo with street parking. ...,,t,f,100%,2016-07-17,t,,41.861057,-87.619303,Modern condo close to parks and museums,South Loop/Printers Row,0,,https://a0.muscache.com/im/pictures/f61997c8-7...,60605,2.0,2.0
50,13435559,4.158883,House,Private room,"{Internet,""Wireless Internet"",""Air conditionin...",2,1.0,Real Bed,strict,False,Chicago,Historic Pilsen storefront that now sells han...,2014-03-19,t,t,100%,2014-02-27,f,2017-04-30,41.851047,-87.679419,Art studio + house in one,Pilsen,85,96.0,https://a0.muscache.com/im/pictures/54826875/f...,60608,1.0,1.0
91,19692298,4.70048,House,Entire home/apt,"{TV,Internet,""Wireless Internet"",""Air conditio...",3,1.0,Real Bed,flexible,True,Chicago,Charming cozy one bedroom w a king size temper...,2015-11-23,t,f,100%,2015-03-15,f,2016-11-21,41.92136,-87.699416,Charming bohemian duplex w King bed,,18,97.0,https://a0.muscache.com/im/pictures/a8312755-d...,60647,1.0,1.0


In [7]:
#drop duplicates for Airbnb ID's and drop NA in specific columns
df.drop_duplicates(subset=['id'],inplace=True)
df.dropna(subset=['bathrooms', 'host_has_profile_pic',
                  'host_identity_verified', 'host_since',
                  'bedrooms', 'beds'],
                   inplace=True)

#create our distance column
df['distance'] = df.apply(lambda r: get_distance(r), axis=1)

#convert host response rates and review scores to floats between 0 and 1
df['host_response_rate'] = df['host_response_rate'].str.strip('%').astype('float')/100
df.review_scores_rating = df.review_scores_rating/100

#fill in empty host response rates and review scores as 0
df['host_response_rate'].fillna(value=0.0, inplace=True)
df['review_scores_rating'].fillna(value=0.0, inplace=True)

#Replace t,f and True,False to 1 and 0
df.replace({'t':1, 'f': 0}, inplace=True)
df.cleaning_fee = df.cleaning_fee.astype(int)

#Apply a sqrt transform on number_of_reviews
df.number_of_reviews = df.number_of_reviews.apply(lambda x: np.sqrt(x))

In [8]:
#make a new columns of accommodates per bedroom
df['ppl_per_room'] = df['accommodates'] / df['bedrooms']

#replace inf and -inf with 0.0
df.ppl_per_room = df.ppl_per_room.replace([np.inf, -np.inf], float(0.0))

In [9]:
#Drop unwanted columns
df_cleaned = df.drop(columns = ['id','amenities', 'description',
                               'first_review', 'host_since', 'last_review',
                               'name', 'neighbourhood', 'thumbnail_url',
                               'zipcode', 'city', 'latitude', 'longitude', 'beds'])

In [10]:
#apply property_grouping function to property_type
df_cleaned['property_type'] = df_cleaned['property_type'].apply(property_grouping)

#get rid of any super strict policies since there are relatively a few of them
df_cleaned = df_cleaned[df_cleaned['cancellation_policy'].str.contains('super')==False]

In [11]:
df_cleaned.shape

(3709, 17)

In [12]:
#Rearranging the dataframe
B = df_cleaned.select_dtypes(exclude='object')
A = df_cleaned[['cleaning_fee', 'host_has_profile_pic', 'host_identity_verified','instant_bookable']]
B = B.drop(columns=list(A.columns))
C = df_cleaned.select_dtypes(include='object')

In [13]:
final_df = A.join(C)
final_df = final_df.join(B)

In [14]:
final_df.head()

Unnamed: 0,cleaning_fee,host_has_profile_pic,host_identity_verified,instant_bookable,property_type,room_type,bed_type,cancellation_policy,log_price,accommodates,bathrooms,host_response_rate,number_of_reviews,review_scores_rating,bedrooms,distance,ppl_per_room
15,1,1,1,0,Apartment,Private room,Real Bed,moderate,4.094345,2,1.5,1.0,0.0,0.0,1.0,3.061211,2.0
16,0,1,0,0,Loft,Private room,Real Bed,strict,4.317488,2,1.0,0.0,3.872983,0.91,1.0,2.716529,2.0
40,1,1,0,1,Other,Entire home/apt,Real Bed,strict,5.521461,5,2.0,1.0,0.0,0.0,2.0,1.236941,2.5
50,0,1,1,0,House,Private room,Real Bed,strict,4.158883,2,1.0,1.0,9.219544,0.96,1.0,2.585655,2.0
91,1,1,0,0,House,Entire home/apt,Real Bed,flexible,4.70048,3,1.0,1.0,4.242641,0.97,1.0,3.808537,3.0


In [15]:
dummies = make_dummies(final_df.iloc[:,4:8])
final_df = final_df.join(dummies)
final_df.drop(columns=['property_type', 'room_type', 'bed_type', 'cancellation_policy'], inplace=True)

In [16]:
final_df.head()

Unnamed: 0,cleaning_fee,host_has_profile_pic,host_identity_verified,instant_bookable,log_price,accommodates,bathrooms,host_response_rate,number_of_reviews,review_scores_rating,bedrooms,distance,ppl_per_room,Apartment,House,Loft,Other,Townhouse,Entire_home_apt,Private_room,Shared_room,Airbed,Couch,Futon,Pull_out_Sofa,Real_Bed,flexible,moderate,strict
15,1,1,1,0,4.094345,2,1.5,1.0,0.0,0.0,1.0,3.061211,2.0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0
16,0,1,0,0,4.317488,2,1.0,0.0,3.872983,0.91,1.0,2.716529,2.0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1
40,1,1,0,1,5.521461,5,2.0,1.0,0.0,0.0,2.0,1.236941,2.5,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1
50,0,1,1,0,4.158883,2,1.0,1.0,9.219544,0.96,1.0,2.585655,2.0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1
91,1,1,0,0,4.70048,3,1.0,1.0,4.242641,0.97,1.0,3.808537,3.0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0


In [17]:
y = final_df.log_price
final_df.drop(columns='log_price',inplace=True)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(final_df, y, test_size=.3)

In [19]:
X_numeric_train = X_train.iloc[:,4:12]
X_categorical_train = X_train.iloc[:,12:]
X_extra_train = X_train.iloc[:,:4]

ss = StandardScaler()

X_num_train_col = X_numeric_train.columns
X_num_train_ind = X_numeric_train.index

X_numeric_train = pd.DataFrame(ss.fit_transform(X_numeric_train))

X_numeric_train.columns = X_num_train_col
X_numeric_train.index = X_num_train_ind

X_numeric_train = X_numeric_train[(np.abs(X_numeric_train) < 2.5).all(axis=1)]

X_categorical_train = X_categorical_train[X_categorical_train.index.isin(X_numeric_train.index)]
X_extra_train = X_extra_train[X_extra_train.index.isin(X_numeric_train.index)]
y_train = y_train[y_train.index.isin(X_numeric_train.index)]

X1 = X_numeric_train.join(X_categorical_train)
X1 = X1.join(X_extra_train)

X1.drop(columns=['Apartment', 'Entire_home_apt', 'Real_Bed', 'moderate'],inplace=True)
lr = LinearRegression()
lr.fit(X1,y_train)
lr.score(X1,y_train)

0.578027904056609

In [20]:
X_numeric_test = X_test.iloc[:,7:12]
X_categorical_test = X_test.iloc[:,12:]
X_extra_test = X_test.iloc[:,:3]

ss = StandardScaler()

X_num_test_col = X_numeric_test.columns
X_num_test_ind = X_numeric_test.index

X_numeric_test = pd.DataFrame(ss.fit_transform(X_numeric_test))

X_numeric_test.columns = X_num_test_col
X_numeric_test.index = X_num_test_ind

X_numeric_test = X_numeric_test[(np.abs(X_numeric_test) < 2.5).all(axis=1)]

X_categorical_test = X_categorical_test[X_categorical_test.index.isin(X_numeric_test.index)]
X_extra_test = X_extra_test[X_extra_test.index.isin(X_numeric_test.index)]
y_test = y_test[y_test.index.isin(X_numeric_test.index)]

X2 = X_numeric_test.join(X_categorical_test)
X2 = X2.join(X_extra_test)

X2.drop(columns=['Apartment', 'Entire_home_apt', 'Real_Bed', 'moderate'],inplace=True)

In [21]:
lr = LinearRegression()
lr.fit(X2,y_test)
lr.score(X2,y_test)

0.5132852115293944

In [22]:
f1 = 'log_price ~ ' + '+'.join(X1.columns)
mod = smf.ols(formula=f1, data = X1.join(y_train))
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,log_price,R-squared:,0.578
Model:,OLS,Adj. R-squared:,0.573
Method:,Least Squares,F-statistic:,108.1
Date:,"Fri, 06 Mar 2020",Prob (F-statistic):,0.0
Time:,09:47:12,Log-Likelihood:,-957.88
No. Observations:,1919,AIC:,1966.0
Df Residuals:,1894,BIC:,2105.0
Df Model:,24,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.2880,0.285,15.023,0.000,3.728,4.848
accommodates,0.1959,0.025,7.780,0.000,0.146,0.245
bathrooms,0.0835,0.015,5.600,0.000,0.054,0.113
host_response_rate,-0.0412,0.030,-1.369,0.171,-0.100,0.018
number_of_reviews,-0.0684,0.012,-5.830,0.000,-0.091,-0.045
review_scores_rating,0.3613,0.053,6.846,0.000,0.258,0.465
bedrooms,0.0561,0.018,3.145,0.002,0.021,0.091
distance,-0.0958,0.010,-9.464,0.000,-0.116,-0.076
ppl_per_room,0.0062,0.014,0.436,0.663,-0.022,0.034

0,1,2,3
Omnibus:,19.891,Durbin-Watson:,1.957
Prob(Omnibus):,0.0,Jarque-Bera (JB):,28.943
Skew:,-0.095,Prob(JB):,5.19e-07
Kurtosis:,3.571,Cond. No.,90.3


In [23]:
f2 = 'log_price ~ ' + '+'.join(X2.columns)
mod = smf.ols(formula=f2, data = X2.join(y_test))
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,log_price,R-squared:,0.513
Model:,OLS,Adj. R-squared:,0.504
Method:,Least Squares,F-statistic:,55.06
Date:,"Fri, 06 Mar 2020",Prob (F-statistic):,2.01e-140
Time:,09:47:15,Log-Likelihood:,-694.14
No. Observations:,1012,AIC:,1428.0
Df Residuals:,992,BIC:,1527.0
Df Model:,19,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.4829,0.347,15.787,0.000,4.801,6.164
number_of_reviews,-0.0927,0.020,-4.557,0.000,-0.133,-0.053
review_scores_rating,-0.0316,0.018,-1.758,0.079,-0.067,0.004
bedrooms,0.1938,0.023,8.423,0.000,0.149,0.239
distance,-0.1235,0.017,-7.400,0.000,-0.156,-0.091
ppl_per_room,0.0500,0.020,2.489,0.013,0.011,0.089
House,0.0915,0.050,1.843,0.066,-0.006,0.189
Loft,0.2136,0.104,2.050,0.041,0.009,0.418
Other,0.1934,0.043,4.468,0.000,0.108,0.278

0,1,2,3
Omnibus:,85.377,Durbin-Watson:,2.036
Prob(Omnibus):,0.0,Jarque-Bera (JB):,181.669
Skew:,0.517,Prob(JB):,3.56e-40
Kurtosis:,4.8,Cond. No.,1.51e+17


In [24]:
X_numeric = final_df.iloc[:,7:12]
X_categorical = final_df.iloc[:,12:]
X_extra = final_df.iloc[:,:3]

ss = StandardScaler()

X_num_col = X_numeric.columns
X_num_ind = X_numeric.index

X_numeric = pd.DataFrame(ss.fit_transform(X_numeric))

X_numeric.columns = X_num_col
X_numeric.index = X_num_ind

X_numeric = X_numeric[(np.abs(X_numeric) < 2.5).all(axis=1)]

X_categorical = X_categorical[X_categorical.index.isin(X_numeric.index)]
X_extra = X_extra[X_extra.index.isin(X_numeric.index)]
y = y[y.index.isin(X_numeric.index)]

X = X_numeric.join(X_categorical)
X = X.join(X_extra)

X.drop(columns=['Apartment', 'Entire_home_apt', 'Real_Bed', 'moderate'],inplace=True)

lr = LinearRegression()
lr.fit(X,y)
lr.score(X,y)

0.511299136764708

In [25]:
X.drop(columns=['Airbed', 'Couch', 'Futon','Pull_out_Sofa', 'flexible', 'host_identity_verified'],inplace=True)
f = 'log_price ~ ' + '+'.join(X.columns)
mod = smf.ols(formula=f, data = X.join(y))
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,log_price,R-squared:,0.51
Model:,OLS,Adj. R-squared:,0.508
Method:,Least Squares,F-statistic:,251.8
Date:,"Fri, 06 Mar 2020",Prob (F-statistic):,0.0
Time:,09:47:16,Log-Likelihood:,-2236.3
No. Observations:,3398,AIC:,4503.0
Df Residuals:,3383,BIC:,4595.0
Df Model:,14,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.5235,0.178,25.384,0.000,4.174,4.873
number_of_reviews,-0.0803,0.011,-7.606,0.000,-0.101,-0.060
review_scores_rating,-0.0195,0.009,-2.071,0.038,-0.038,-0.001
bedrooms,0.1942,0.011,17.184,0.000,0.172,0.216
distance,-0.1208,0.009,-13.871,0.000,-0.138,-0.104
ppl_per_room,0.0532,0.010,5.107,0.000,0.033,0.074
House,0.1117,0.025,4.445,0.000,0.062,0.161
Loft,0.2227,0.062,3.619,0.000,0.102,0.343
Other,0.2150,0.024,9.057,0.000,0.168,0.261

0,1,2,3
Omnibus:,252.107,Durbin-Watson:,2.02
Prob(Omnibus):,0.0,Jarque-Bera (JB):,708.606
Skew:,0.397,Prob(JB):,1.34e-154
Kurtosis:,5.091,Cond. No.,54.3


In [26]:
X.drop(columns=['strict'],inplace=True)
f = 'log_price ~ ' + '+'.join(X.columns)
mod = smf.ols(formula=f, data = X.join(y))
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,log_price,R-squared:,0.51
Model:,OLS,Adj. R-squared:,0.508
Method:,Least Squares,F-statistic:,271.3
Date:,"Fri, 06 Mar 2020",Prob (F-statistic):,0.0
Time:,09:47:16,Log-Likelihood:,-2236.3
No. Observations:,3398,AIC:,4501.0
Df Residuals:,3384,BIC:,4586.0
Df Model:,13,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.5233,0.178,25.387,0.000,4.174,4.873
number_of_reviews,-0.0805,0.011,-7.646,0.000,-0.101,-0.060
review_scores_rating,-0.0195,0.009,-2.076,0.038,-0.038,-0.001
bedrooms,0.1940,0.011,17.252,0.000,0.172,0.216
distance,-0.1208,0.009,-13.874,0.000,-0.138,-0.104
ppl_per_room,0.0531,0.010,5.105,0.000,0.033,0.074
House,0.1116,0.025,4.443,0.000,0.062,0.161
Loft,0.2222,0.061,3.615,0.000,0.102,0.343
Other,0.2149,0.024,9.056,0.000,0.168,0.261

0,1,2,3
Omnibus:,252.305,Durbin-Watson:,2.02
Prob(Omnibus):,0.0,Jarque-Bera (JB):,709.702
Skew:,0.397,Prob(JB):,7.77e-155
Kurtosis:,5.093,Cond. No.,52.6


In [27]:
coefficients = dict(zip(X.columns,lr.coef_))
coefficients = sorted(coefficients.items(), key=(lambda c:c[1]), reverse=True)

In [28]:
coefficients

[('Townhouse', 0.33370943849196866),
 ('Loft', 0.22514408728797755),
 ('Other', 0.21508179231143598),
 ('bedrooms', 0.19414242143045662),
 ('House', 0.11425938330335692),
 ('ppl_per_room', 0.052880195920144604),
 ('review_scores_rating', -0.021122375490606235),
 ('cleaning_fee', -0.04440517179566175),
 ('host_has_profile_pic', -0.07862921493120135),
 ('number_of_reviews', -0.08480258021264012),
 ('distance', -0.12090679432614981),
 ('Private_room', -0.6618431178521413),
 ('Shared_room', -1.1290906856468343)]

In [29]:
final_df
actual_prices = y.apply(lambda Y: np.exp(Y))
actual_prices

15        60.0
16        75.0
40       250.0
50        64.0
91       110.0
         ...  
74010    140.0
74033    140.0
74057     69.0
74092    110.0
74104     78.0
Name: log_price, Length: 3398, dtype: float64

In [30]:
df_vis['actual_prices'] = actual_prices

In [31]:
chi_num_people = df['accommodates'].unique()

In [32]:
chi_num_people = sorted(chi_num_people)

In [33]:
df['instant_bookable'].value_counts()

0    2656
1    1059
Name: instant_bookable, dtype: int64

In [34]:
df['accommodates'].value_counts()

2     1416
4      706
6      412
3      320
1      240
5      228
8      160
10      83
7       59
12      25
16      24
9       15
14      12
13       7
15       4
11       4
Name: accommodates, dtype: int64

In [37]:
chi_probability = []
chi_group_probabilities = []
chi_responses = []
chi_rooms = []
chi_distances = []
chi_per_room = []
chi_prices = []
chi_bathrooms = []
#verified = []
for n in chi_num_people:
    temp_df = df_vis[df_vis['accommodates'] == n]
    n_y = list(temp_df['instant_bookable'].value_counts())
    
    group_p = len(temp_df['accommodates'])/len(final_df['accommodates'])
    response_mean = np.mean(temp_df['host_response_rate'])
    room_mean = np.mean(temp_df['bedrooms'])
    distance_mean = np.mean(temp_df['distance'])
    ppl = np.mean(temp_df['ppl_per_room'])
    price = np.mean(temp_df['actual_prices'])
    bathroom = np.mean(temp_df['bathrooms'])
    
    chi_group_probabilities.append(group_p)
    chi_responses.append(response_mean)
    chi_probability.append(n_y[1]/sum(n_y))
    chi_rooms.append(room_mean)
    chi_distances.append(distance_mean)
    chi_per_room.append(ppl)
    chi_prices.append(price)
    chi_bathrooms.append(bathroom)

In [41]:
chi_distances

[4.535114338084559,
 4.152032033317386,
 3.6008852645078613,
 3.6241075139931658,
 3.380424440005781,
 3.7191416795433465,
 3.759474050859176,
 4.036074919146024,
 4.520354086707857,
 3.3944474563658797,
 3.139857108008428,
 3.858627542973955,
 3.669589482591857,
 4.1642599672180065,
 5.559858725178525,
 3.130400965979456]

In [38]:
chi_prices

[62.85652173913044,
 83.02415812591508,
 111.99675324675324,
 137.48458149779736,
 163.38425925925927,
 209.43304843304844,
 211.11320754716985,
 224.08130081300817,
 287.18181818181824,
 252.46153846153842,
 198.99999999999997,
 303.83333333333326,
 337.0000000000002,
 292.50000000000006,
 nan,
 481.1111111111108]

In [39]:
chi_group_probabilities

[0.06470746832030197,
 0.3817740630897816,
 0.08627662442706929,
 0.19034780264222162,
 0.06093286600161769,
 0.11081153949851712,
 0.0159072526287409,
 0.04259908331086546,
 0.004044216770018873,
 0.022377999460771097,
 0.0010784578053383662,
 0.0067403612833647885,
 0.0018873011593421407,
 0.0032353734160150984,
 0.0010784578053383662,
 0.006201132380695605]

In [40]:
np.mean(df_vis['log_price'])

124.82607416127134

In [None]:
np.mean(df_vis[])