In [6]:
# import libraries
%matplotlib inline
import numpy as np
#import csv
import matplotlib.pyplot as plt
import pandas as pd
import glob
import ulmo
import os
import scipy.spatial
import scipy.stats
import matplotlib
matplotlib.rcParams.update({'font.size': 18})

In [7]:
# plotting utilities
def lin_trend_plot(start_x, y,title, xlabel, ylabel) : 
# plots x,y (need to be np array) and calculates and prints their best fit line
    ind = ~np.isnan(y.values)# & ~np.isnan(x) # subset values that aren't NaNs
    x = np.arange(0,y.shape[0])
    m, b, r_value, p, std_err = scipy.stats.linregress(x[ind],y[ind])
    plt.scatter(x+start_x,y)
    plt.plot(x+start_x, m*x+b, color = 'black')
    plt.title(title)
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    # annotate the linear reqression, y = mx+b
    plt.annotate('y = %.2f x + %.2f'%(m,b), xy=(.5, .9), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    plt.annotate('r = %.2f'%(r_value), xy=(.5, .85), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    plt.annotate('p = %.2f'%(p), xy=(.5, .8), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    plt.annotate('N = %i'%(ind.shape), xy=(.5, .75), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    return m, b, r_value, p, std_err

def easy_scatter(x,y,title, xlabel, ylabel) : 
# plots x,y (need to be np array) and calculates and prints their best fit line
    ind = ~np.isnan(y) & ~np.isnan(x) # subset values that aren't NaNs
    m,b = np.polyfit(x[ind],y[ind],1)
    r, p = scipy.stats.pearsonr(x[ind], y[ind]) #np.corrcoef(x[ind],y[ind])[0,1]
    plt.scatter(x,y)
    plt.plot(x, m*x+b, color = 'black')
    plt.title(title)
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    # annotate the linear reqression, y = mx+b
    plt.annotate('y = %.2f x + %.2f'%(m,b), xy=(.5, .9), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    plt.annotate('r = %.2f'%(r), xy=(.5, .85), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    plt.annotate('p = %.2f'%(p), xy=(.5, .8), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    plt.annotate('N = %i'%(ind.shape), xy=(.5, .75), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    return m, r, p

def how_UHI_varies(mean_UHIs, closeststations_distance): 
    plt.figure(figsize= [12,4])
    plt.subplot(1,3,1)
    data = mean_UHIs[~np.isnan(mean_UHIs)]
    plt.hist(data)
    plt.xlabel('$\Delta T$ ($\Delta ^\circ$C)')
    plt.ylabel('Count')
    plt.annotate('min = %.2f'%(data.min()), xy=(0, .94), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    plt.annotate('max = %.2f'%(data.max()), xy=(0, .89), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    plt.annotate('$\mu$ = %.2f'%(data.mean()), xy=(0, .84), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    plt.annotate('$\sigma$ = %.2f'%(data.std()), xy=(0, .79), xycoords='axes fraction',  horizontalalignment='left', verticalalignment='bottom')
    plt.title('Histogram of $\Delta T$')

    plt.subplot(1,3,2)
    m,r,p = easy_scatter(closeststations_distance, mean_UHIs, 
                 'Distance to rural station vs. $\Delta T$ ', 
             'Distance to rural station (degrees lat/lon)', '$\Delta T$ ($\Delta ^\circ$C)',
             )

    plt.subplot(1,3,3)
    m2,r2,p2 = easy_scatter(ghcn.loc[rural_station_list].Brightness.values, mean_UHIs, 
                 'Brightness versus $\Delta T$', 
                 'Satellite Brightness Index', '$\Delta T$ ($\Delta ^\circ$C)',
                 )
    return m,r,p

In [9]:
# readin in GHCN data 
ghcn = pd.read_fwf('data/ghcnd-stations.txt', colspecs = [(0,11), (12,19), (21,29), (31,36),(38,40), (41,70), (72,74),(76,78),(80,85)], header = None) 
colnames = ['GHCN ID', 'lat', 'lon', 'elevation', 'state', 'name', 'gsn flag', 'HCN/CRN FLAG', 'WMO ID']
ghcn.columns = colnames

# append the brightness index 
BI = np.load('data/brightnessGHCN.npy')
ghcn['Brightness'] = BI

currentstations = ulmo.ncdc.ghcn_daily.get_stations(start_year=1985, end_year = 2016, elements = ['TMIN', 'TMAX', 'AWND'], as_dataframe=True, update=False)
currentGHCNstations = np.intersect1d(currentstations.id, ghcn['GHCN ID'].values)
ghcnSubset = ghcn.set_index('GHCN ID').loc[currentstations.id.values]
# at this point, ghcn must have the station id set as the index 

ghcn_noairport = ghcn[~ghcn.name.str.contains('INTL')]
ghcn_noairport = ghcn_noairport[~ghcn_noairport.name.str.contains(' AP')]
ghcn_noairport = ghcn_noairport[~ghcn_noairport.name.str.contains('AIRPORT')]
ghcn = ghcn_noairport.set_index('GHCN ID').loc[currentstations.id.values]

# generate atlas of cities and k-d tree 
atlas = pd.read_csv('data/world_cities.csv')
atlas = atlas[(atlas['country'] == 'United States of America')]# & (atlas['pop']>100000)]
atlas = atlas.set_index('city')
tree = scipy.spatial.cKDTree(ghcn[['lon', 'lat']].values, leafsize=100)
#atlas = atlas[atlas['pop']> 500000]

paired_df = pd.read_csv('USpairs2005-2015.csv').set_index('City', drop = False)
paired_df = paired_df[paired_df['Urban distance'] < 0.25]

results = np.ones([paired_df.shape[0],11])*np.nan # save out min,max, mean, std, m,r,p
results_filepath = 'plots/version7/errorbars/'

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


In [10]:
!mkdir plots/version7/errorbars
!mkdir plots/version7/errorbars/trend
!mkdir plots/version7/errorbars/slopes
!mkdir plots/version7/errorbars/values

In [5]:
city = 'Mesa'
### Set variables
print(city)
urbanID = paired_df.loc[city]['Urban station']
urbandata = ulmo.ncdc.ghcn_daily.get_data(urbanID,
                                     as_dataframe=True, update = False)
urban_tmin = pd.to_numeric(urbandata['TMIN']['2000-01-01':].value/10.) 
urban_tmin.loc[urbandata['TMIN']['2000-01-01':]['qflag'].dropna().index] = np.nan
urban_summer = urban_tmin[(urban_tmin.index.month >= 6) & (urban_tmin.index.month <= 8)]
# set lat/lon
try: # if  
    if atlas[atlas.index==city].shape[0]>1 : # if more than one hit for the city, eg, Kansas City (MO and KS)
            lat = atlas.loc[city]['lat'][0]
            lon = atlas.loc[city]['lng'] [1]
    else: 
        lat = atlas[atlas['pop']> 300000].loc[city]['lat']
        lon = atlas[atlas['pop']> 300000].loc[city]['lng']        
# try to fix if there are two such cities in the atlas; take the second one
except IndexError: 
    lat = atlas[atlas['pop']> 300000].loc[city]['lat'][1]
    lon = atlas[atlas['pop']> 300000].loc[city]['lng'][1]
# find the closest stations
closeststations = tree.query([lon,lat], k =35, distance_upper_bound=1.5) #used in pairing algorithm: .5
cols = ['Distance', 'Index', 'GHCNID', 'Brightness']

Mesa


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  axis=1)
.resample() is now a deferred operation
You called index(...) on this deferred object which materialized it into a dataframe
by implicitly taking the mean.  Use .resample(...).mean() instead
  daily_index = element_df.resample('D').index.copy()


In [8]:
lat, lon

(33.423914609999997, -111.73608440000001)

In [7]:
closeststations

(array([ 0.08292739,  0.09221545,  0.15282465,  0.17957532,  0.18491879,
         0.22573856,  0.24113618,  0.2579119 ,  0.3215709 ,  0.32743828,
         0.33666198,  0.34675309,  0.35318386,  0.35533968,  0.38165245,
         0.39480938,  0.42021496,  0.42535645,  0.42784842,  0.47419441,
         0.47860788,  0.52119142,  0.53750026,  0.59025405,  0.60893941,
         0.62185774,  0.63139606,  0.63681901,  0.64329358,  0.64584151,
         0.65098671,  0.67889616,  0.69537766,  0.83784176,  0.84701399]),
 array([11159, 11113, 11211, 11121, 11230, 11096, 11223, 11062, 11164,
        11187, 11221, 22627, 11196, 22404, 11108, 11069, 11149, 11090,
        11154, 11092, 22407, 11117, 11091, 11259, 11192, 11194, 11151,
        11195, 19291, 19368, 11227, 11115, 11064, 11094, 11184]))

In [11]:
from scipy.odr import Model, Data, ODR
from scipy.stats import linregress
def f(B, x):
    '''Linear function y = m*x + b'''
    # B is a vector of the parameters.
    # x is an array of the current x values.
    # x is in the same format as the x passed to Data or RealData.
    #
    # Return an array in the same format as y passed to Data or RealData.
    return B[0]*x + B[1]

def easy_scatter_tls(x,y,title='', xlabel='', ylabel='', text_y_loc = .9 , text_x_loc = .5) : 
        linear = Model(f)
        ind = ~np.isnan(y) & ~np.isnan(x)
        linreg = scipy.stats.linregress(x[ind], y[ind])
        mydata = Data(x[ind], y[ind])
        myodr = ODR(mydata, linear, beta0=linreg[0:2]) # beta0 are initial guesses for paremeter values, ie, intercept and slope
        myoutput = myodr.run()
        # t-test for tls regression coeff
        DF = y[ind].shape[0] -2 # for a linear model
        b1 = myoutput.beta[0] # slope
        H0 = 1 # null hypothesis
        SE = myoutput.sd_beta[0]
        t = (b1-H0)/SE
        p = scipy.stats.t.cdf(t,df=DF)
        # plot it  
        plt.plot(x, f(myoutput.beta, x), 'k')
        plt.plot(np.linspace(x.min(), x.max(), 20),np.linspace(x.min(), x.max(), 20), '--k' )
        plt.scatter(x,y,color = 'grey', alpha =.5)
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.legend(['y = %.2f x + %.2f'%(myoutput.beta[0], myoutput.beta[1]),
                    '1-1 line', 'Data'], loc=2, frameon =False)
        plt.title(title)
        return myoutput.beta[0], myoutput.beta[1],linreg.rvalue, p

In [20]:
import ee
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
ee.Initialize()

nlcd = ee.Image('USGS/NLCD/NLCD2011').select('landcover')

In [26]:
lcc = 21
if (lcc >20) & (lcc <=24) : 
    print('developed')
else: 
    print('not developed')

developed


In [74]:
brightness_threshold = 25
#redo_cities = np.where(np.isnan(results[:,0]))[0]
for city in paired_df.index[26:]:
    ### Set variables
    print(city)
    ruralID = paired_df.loc[city]['Rural station']
    ruraldata = ulmo.ncdc.ghcn_daily.get_data(ruralID,
                                         as_dataframe=True, update = False)
    rural_tmin = pd.to_numeric(ruraldata['TMIN']['2000-01-01':].value/10.) 
    rural_tmin.loc[ruraldata['TMIN']['2000-01-01':]['qflag'].dropna().index] = np.nan
    rural_summer = rural_tmin[(rural_tmin.index.month >= 6) & (rural_tmin.index.month <= 8)]
    #urban_summer = urban_tmin[(urban_tmin.index.month >= 6) & (urban_tmin.index.month <= 8)]
            # set lat/lon
    try: # if  
        if city == 'Grand Prairie': 
                lat = atlas.loc['Arlington']['lat']
                lon = atlas.loc['Arlington']['lng']
        elif atlas[atlas.index==city].shape[0]>1 : # if more than one hit for the city, eg, Kansas City (MO and KS)
                lat = atlas.loc[city]['lat'][0]
                lon = atlas.loc[city]['lng'][1]
        else: 
            lat = atlas[atlas['pop']> 300000].loc[city]['lat']
            lon = atlas[atlas['pop']> 300000].loc[city]['lng']        
    # try to fix if there are two such cities in the atlas; take the second one
    except IndexError: 
        lat = atlas[atlas['pop']> 300000].loc[city]['lat'][1]
        lon = atlas[atlas['pop']> 300000].loc[city]['lng'][1]

    # find the closest stations
    closeststations = tree.query([lon,lat], k =35, distance_upper_bound=1.5) #used in pairing algorithm: .5
    cols = ['Distance', 'Index', 'GHCNID', 'Brightness']
    try: 
        # get station ids
        stations = pd.DataFrame(np.array([closeststations[0][~np.isinf(closeststations[0])].astype(float),
                                          closeststations[1][~np.isinf(closeststations[0])],
                                          ghcn.iloc[closeststations[1][~np.isinf(closeststations[0])]]['Brightness'].index,
                                          ghcn.iloc[closeststations[1][~np.isinf(closeststations[0])]]['Brightness'].values]).T, 
                    columns = cols).set_index('GHCNID').drop(ruralID)
        stations = stations[~np.isinf(stations['Distance'].values.astype(float))]
    except IndexError: 
        closeststations = closeststations[0]
        stations = pd.DataFrame(np.array([closeststations[0].astype(float),closeststations[1], ghcn.iloc[closeststations[1]]['Brightness'].index, ghcn.iloc[closeststations[1]]['Brightness'].values]).T, 
                columns = cols).set_index('GHCNID').drop(ruralID)
        stations = stations[~np.isinf(stations['Distance'].values.astype(float))]
    except ValueError: # got this when urban ID wasn't in the stations for Jacksonville, should maybe check
        stations = pd.DataFrame(np.array([closeststations[0].astype(float),closeststations[1], ghcn.iloc[closeststations[1]]['Brightness'].index, ghcn.iloc[closeststations[1]]['Brightness'].values]).T, 
            columns = cols).set_index('GHCNID')#.drop(urbanID)
        stations = stations[~np.isinf(stations['Distance'].values.astype(float))]
    #rural_stations = stations.index
    # select only dim stations
    urban_station_list = stations[stations.Brightness > 45].index
    urban_distance = stations[stations.Brightness > 45].Distance.values.astype(float)
    
    urban_station_list = urban_station_list[urban_distance < .2]
     # if there are rural stations available, do analysis
    if len(urban_station_list) > 0 :# & (ind.sum() > 0): 
        # preallocate
        mean_urban = np.ones(len(urban_station_list))*np.nan
        mean_ms = np.ones(len(urban_station_list))*np.nan
        mean_rs = np.ones(len(urban_station_list))*np.nan
        mean_ps = np.ones(len(urban_station_list))*np.nan
        mean_Trslopes =  np.ones(len(urban_station_list))*np.nan
        
        ii = 0 
        # loop over rural stations
        for urban_id in urban_station_list: #[2:] :
            point = ee.Geometry.Point([ghcn.loc[urban_id].lon,ghcn.loc[urban_id].lat])
            #point = ee.Geometry.Point([ghcn.loc[urban_station_list[0]].lon,ghcn.loc[urban_station_list[0]].lat])
            # sample the data of the NLCD at this location
            data = nlcd.sample(point,30).getInfo()
            try: 
                lcc = data['features'][0]['properties']['landcover']
            except IndexError: 
                lcc = np.nan
            if (lcc >20) & (lcc <=24) & (ghcn.loc[urban_id].Brightness-ghcn.loc[ruralID].Brightness >25): 
                # read in rural data
                urbandata = ulmo.ncdc.ghcn_daily.get_data(urban_id, as_dataframe=True, update=False)
                ###### Import data
                urban_tmin = pd.to_numeric(urbandata['TMIN']['2000-01-01':].value/10.) #rural tmin
                # drop data with flags here
                urban_tmin.loc[urbandata['TMIN']['2000-01-01':]['qflag'].dropna().index] = np.nan
                # extract summertime data 
                urban_summer = urban_tmin[(urban_tmin.index.month >= 6) & (urban_tmin.index.month <= 8)]            
                ###### calculate slope 

                ind = ~np.isnan(urban_summer) & ~np.isnan(urban_summer[rural_summer.index])
                if ind.sum() > 0 :
                    x = rural_summer
                    y = urban_summer
                    linear = Model(f)
                    ind = ~np.isnan(y) & ~np.isnan(x)
                    linreg = scipy.stats.linregress(x[ind], y[ind])
                    mydata = Data(x[ind], y[ind])
                    myodr = ODR(mydata, linear, beta0=linreg[0:2]) # beta0 are initial guesses for paremeter values, ie, intercept and slope
                    myoutput = myodr.run()
                    m = myoutput.beta[0]
                    mean_ms[ii] = m
                    print(m)
                else: 
                    print('no overlapping data')
            else:
                print('not urban')
            ii = ii+1
#       #  print(mean_ms)
        #np.save(results_filepath+'trend/'+ city.replace(" ", "")[0:5]+'Trtrend.npy', mean_Trslopes)
        np.save(results_filepath+'slopes/'+ city.replace(" ", "")[0:5]+'m_urban.npy', mean_ms)
        np.save(results_filepath+'slopes/'+ city.replace(" ", "")[0:5]+'urban_distance.npy', urban_distance[urban_distance <.2])
        #np.save(results_filepath+'slopes/'+ city.replace(" ", "")[0:5]+'r.npy', mean_ms)
        #np.save(results_filepath+'slopes/'+ city.replace(" ", "")[0:5]+'p.npy', mean_ps)
        #np.save(results_filepath+'values/'+ city.replace(" ", "")[0:5]+'rural.npy', mean_rural)
    else: 
        print( 'Not enough urban stations for %s'%city)
#     if np.mod(i,10) == 0 : 
#         np.savetxt(results_filepath+'results.csv', results, delimiter = ',')
        
#np.savetxt(results_filepath+'results.csv', results, delimiter = ',')

Salt Lake City
no overlapping data
1.07209303697
1.0286418353
no overlapping data
0.91902397808
no overlapping data
Omaha
1.02962054566
1.05058232862
0.940590236684
Raleigh
0.837685818352
0.942532138729
not urban
0.910775312377
Cleveland
1.03174082377
Cincinnati
1.07921222753
1.08408549517
Nashville
not urban
0.956849774323
not urban
not urban
1.0126921451
1.039792624
Memphis
0.930001945716
0.872208262003
0.957121671077
Norfolk
Not enough urban stations for Norfolk
Milwaukee
0.994939102871
1.00350547973
1.013637353
0.979195178111
0.952088456345
Buffalo
1.04714582509
0.982352284997
Pittsburgh
not urban
not urban
not urban
1.09682054418
Minneapolis
0.83312659481
0.9076743661
0.853595815987
0.93347569675
not urban
Honolulu
not urban
not urban
not urban
not urban
not urban
not urban
Seattle
0.875890714682
no overlapping data
0.837742284221
no overlapping data
0.891851648127
Phoenix
not urban
no overlapping data
0.901353308942
1.11743288262
1.1288163613
San Diego
0.338468796121
not urban
0.

In [73]:
paired_df.index[26:]

Index([u'Salt Lake City', u'Omaha', u'Raleigh', u'Cleveland', u'Cincinnati',
       u'Nashville', u'Memphis', u'Norfolk', u'Milwaukee', u'Buffalo',
       u'Pittsburgh', u'Minneapolis', u'Honolulu', u'Seattle', u'Phoenix',
       u'San Diego', u'St. Louis', u'New Orleans', u'Dallas', u'Philadelphia',
       u'Detroit', u'San Francisco', u'Denver', u'Miami', u'Chicago',
       u'Los Angeles', u'Washington, D.C.', u'New York'],
      dtype='object', name=u'City')

In [68]:
city

'Fort Lauderdale'

city

In [64]:
urban_station_list

Index([u'USC00414597', u'USW00093901', u'USC00410337', u'USW00013960'], dtype='object', name=u'GHCNID')

In [65]:
ghcn.loc[urban_station_list]

Unnamed: 0_level_0,lat,lon,elevation,state,name,gsn flag,HCN/CRN FLAG,WMO ID,Brightness
GHCNID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
USC00414597,32.64,-96.974,180.0,TX,JOE POOL LAKE,,,,59.0
USW00093901,32.733,-96.966,150.0,TX,DALLAS HENSLEY FLD NAS,,,,62.0
USC00410337,32.757,-97.073,163.0,TX,ARLINGTON SIX FLAGS,,,,63.0
USW00013960,32.851,-96.855,134.0,TX,DALLAS LOVE FLD,,,72258.0,63.0


In [66]:
urban_distance[urban_distance<.25]

array([ 0.06435467,  0.07258676,  0.08945549,  0.23439122])

In [61]:
arr = np.ones(len(urban_station_list))
i =0
for urban_id in urban_station_list:
    point = ee.Geometry.Point([ghcn.loc[urban_id].lon,ghcn.loc[urban_id].lat])
    # sample the data of the NLCD at this location
    data = nlcd.sample(point,30).getInfo()
    lcc = data['features'][0]['properties']['landcover']
    arr[i] = lcc
    i= i+1

In [62]:
arr

array([ 43.,  22.,  23.,  23.,  22.,  23.,  23.])

In [56]:
urban_distance

array([ 0.0301089 ,  0.07750495,  0.1551334 ,  0.15673238,  0.15697906,
        0.17598603,  0.17937529,  0.2806035 ,  0.29373218,  0.29445688,
        0.29560228,  0.31410249,  0.31556151,  0.3333158 ,  0.34675558,
        0.35311453,  0.35701827,  0.36246311,  0.37304836,  0.3731582 ,
        0.38039605,  0.40296671,  0.42942603,  0.43503905,  0.43923663,
        0.44684735,  0.46644918,  0.47666916,  0.48441935,  0.49346895,
        0.51333183,  0.52656896])

In [52]:
paired_df.index[8:]

Index([u'Oakland', u'West Palm Beach', u'Louisville', u'Columbus',
       u'San Bernardino', u'St. Paul', u'Tucson', u'Fresno', u'Albuquerque',
       u'Kansas City', u'Ft. Worth', u'Austin', u'Indianapolis', u'Baltimore',
       u'San Jose', u'Sacramento', u'Las Vegas', u'Portland',
       u'Salt Lake City', u'Omaha', u'Raleigh', u'Cleveland', u'Cincinnati',
       u'Nashville', u'Memphis', u'Norfolk', u'Milwaukee', u'Buffalo',
       u'Pittsburgh', u'Minneapolis', u'Honolulu', u'Seattle', u'Phoenix',
       u'San Diego', u'St. Louis', u'New Orleans', u'Dallas', u'Philadelphia',
       u'Detroit', u'San Francisco', u'Denver', u'Miami', u'Chicago',
       u'Los Angeles', u'Washington, D.C.', u'New York'],
      dtype='object', name=u'City')

'Oakland'

In [51]:
urban_distance[urban_distance < .25]

array([ 0.05182397,  0.05414334,  0.07897238,  0.09608563,  0.11204732,
        0.15641196,  0.20489944,  0.21639673])

In [46]:
urban_distance[urban_distance < .25]

array([ 0.0198254 ,  0.07309721,  0.07992144,  0.09302259,  0.09801534,
        0.11404239,  0.14398662,  0.18398444,  0.20992983])

In [21]:
if (lcc >20) & (lcc <=24) & (ghcn.loc[ruralID].Brightness - ghcn.loc[urban_id]) > 25: 
    

Index([u'USC00025467', u'USC00022782', u'USC00027661', u'USC00023190',
       u'USC00028499', u'USC00026603', u'USC00028112', u'USW00093140',
       u'USC00027370', u'USW00053156', u'USC00022462', u'USC00021282',
       u'USC00021314', u'USW00053162', u'USC00023027', u'USC00021306',
       u'USC00029634', u'USC00024977'],
      dtype='object', name=u'GHCNID')

In [34]:
(ghcn.loc[urban_id].Brightness-ghcn.loc[ruralID].Brightness >25)

50.0

In [29]:
(ghcn.loc[ruralID].Brightness - ghcn.loc[urbanID]) > 25

18.0

# 