In [1]:
%matplotlib inline
from __future__ import division, print_function

from scipy import stats as ss
from sklearn.preprocessing import StandardScaler
from statsmodels.stats import multitest
from statsmodels.regression.quantile_regression import QuantReg

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('pleasantness.dat', sep=' ')
features = ['mov_cars', 'park_cars', 'debris', 'landscape', 
            'mov_ciclyst', 'street_wid', 'build_ident', 'trees', 
            'diff_build', 'people', 'build_height', 'red', 'green', 'blue']
#for x in df.columns:
#    print(x)

In [3]:
rank_jovem = df['V3.Jovem'].argsort()
rank_adulto = df['V3.Adulto'].argsort()
response = (rank_jovem - rank_adulto)

In [4]:
orig = df[features].copy()
#import itertools
#for f1, f2 in itertools.combinations(orig.columns.copy(), 2):
#    prod = orig[f1].values * orig[f2].values
#    orig[f1 + '_times_' + f2] = prod

orig['is_catole'] = np.array(df['bairro'] == 'catole', dtype='d')
orig['is_centro'] = np.array(df['bairro'] == 'centro', dtype='d')
orig['is_liberdade'] = np.array(df['bairro'] == 'liberdade', dtype='d')

scaled = pd.DataFrame(StandardScaler().fit_transform(orig.copy().values),
                      columns=orig.columns)
print(orig.shape)
assert orig.shape == scaled.shape

(108, 17)


In [5]:
model = QuantReg(response, orig)

In [6]:
for q in np.linspace(0.05, 0.95, 10):
    print(q)
    print(model.fit(q=q).summary())
    print()
    print()



0.05
                         QuantReg Regression Results                          
Dep. Variable:                      y   Pseudo R-squared:               0.1185
Model:                       QuantReg   Bandwidth:                       54.69
Method:                 Least Squares   Sparsity:                        234.6
Date:                Tue, 17 May 2016   No. Observations:                  108
Time:                        20:17:17   Df Residuals:                       91
                                        Df Model:                           16
                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------
mov_cars         2.9962      7.244      0.414      0.680       -11.393    17.385
park_cars       -2.3368      3.386     -0.690      0.492        -9.062     4.388
debris          14.8123     16.597      0.892      0.374       -18.155    47.780
landscape        5.3612     15.137   






0.15
                         QuantReg Regression Results                          
Dep. Variable:                      y   Pseudo R-squared:               0.1507
Model:                       QuantReg   Bandwidth:                       41.50
Method:                 Least Squares   Sparsity:                        126.3
Date:                Tue, 17 May 2016   No. Observations:                  108
Time:                        20:17:18   Df Residuals:                       91
                                        Df Model:                           16
                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------
mov_cars         5.4528      4.596      1.186      0.239        -3.676    14.582
park_cars       -0.5649      2.259     -0.250      0.803        -5.053     3.923
debris          -7.6196      9.803     -0.777      0.439       -27.091    11.852
landscape        5.6660     10.352






0.25
                         QuantReg Regression Results                          
Dep. Variable:                      y   Pseudo R-squared:               0.1345
Model:                       QuantReg   Bandwidth:                       37.88
Method:                 Least Squares   Sparsity:                        111.2
Date:                Tue, 17 May 2016   No. Observations:                  108
Time:                        20:17:18   Df Residuals:                       91
                                        Df Model:                           16
                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------
mov_cars         0.5084      3.783      0.134      0.893        -7.007     8.023
park_cars       -1.5070      1.785     -0.844      0.401        -5.052     2.038
debris          -3.7593      8.777     -0.428      0.669       -21.194    13.675
landscape        5.7837      9.612






0.35
                         QuantReg Regression Results                          
Dep. Variable:                      y   Pseudo R-squared:               0.1194
Model:                       QuantReg   Bandwidth:                       34.83
Method:                 Least Squares   Sparsity:                        98.81
Date:                Tue, 17 May 2016   No. Observations:                  108
Time:                        20:17:19   Df Residuals:                       91
                                        Df Model:                           16
                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------
mov_cars         3.6561      3.479      1.051      0.296        -3.255    10.567
park_cars       -0.7977      1.628     -0.490      0.625        -4.031     2.436
debris         -11.0129      8.409     -1.310      0.194       -27.717     5.691
landscape        8.8026      9.316






0.45
                         QuantReg Regression Results                          
Dep. Variable:                      y   Pseudo R-squared:              0.09415
Model:                       QuantReg   Bandwidth:                       33.96
Method:                 Least Squares   Sparsity:                        93.13
Date:                Tue, 17 May 2016   No. Observations:                  108
Time:                        20:17:19   Df Residuals:                       91
                                        Df Model:                           16
                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------
mov_cars         1.5968      3.464      0.461      0.646        -5.284     8.478
park_cars       -1.5065      1.659     -0.908      0.366        -4.801     1.788
debris         -20.2672      8.605     -2.355      0.021       -37.360    -3.174
landscape       16.7970      9.216






0.55
                         QuantReg Regression Results                          
Dep. Variable:                      y   Pseudo R-squared:              0.08416
Model:                       QuantReg   Bandwidth:                       35.96
Method:                 Least Squares   Sparsity:                        95.68
Date:                Tue, 17 May 2016   No. Observations:                  108
Time:                        20:17:19   Df Residuals:                       91
                                        Df Model:                           16
                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------
mov_cars         0.8994      3.694      0.243      0.808        -6.439     8.238
park_cars       -1.9467      1.688     -1.154      0.252        -5.299     1.405
debris         -14.8812      9.246     -1.609      0.111       -33.247     3.485
landscape       11.2679      9.722

