# Do some initial testing and analysis  


In [4]:
import pickle
from geopy.distance import vincenty

Import all data (data was previously cleaning in other notebooks)

In [5]:
station_data = pickle.load( open( "station_data.p", "rb" ) )
bike_location = pickle.load( open( "bike_location.p", "rb" ) )


Now I have one dict with metro stations and one with bike stations

In [4]:
print(station_data['RD']['Bethesda'])
print(bike_location['Silver Spring Metro/Colesville Rd & Wayne Ave'])

[38.984282, -77.094431]
[38.99521, -77.02918]


### step 1) create list of bike stations along each line
Compare the distances of each bike station to each metro station of each line. If the bike station is "close" to a metro station, it should be added to a list.  
* Build test example with RD line

In [28]:
vincenty(station_data['RD']['Silver Spring'], bike_location['11th & O St NW']).miles
for key_bike in bike_location:
        dist = vincenty(station_data['RD']['Silver Spring'], bike_location[key_bike]).miles
        if dist <= 0.3:
            print([key_bike ,dist])

['Silver Spring Metro/Colesville Rd & Wayne Ave', 0.14900125289692967]
['East West Hwy & Blair Mill Rd', 0.269544825073598]
['Ripley & Bonifant St', 0.12842855448544993]


* Iterate through each metro line, calculating the distance between each station of that line to each bikeshare station.  
* Use `set` to drop duplicates

#### make the below code a function that can be used to generate lists of stations based on different distances

In [6]:
def close_stations(distance):
    """This fn will return a dict of bikeshare stations close 
    to each metro stop based on the suppled distance in miles"""
    lines = ['RD', 'YL', 'GR','BL', 'OR', 'SV']
    bikes_close = dict()
    for ii in range(len(lines)):
        bikes_temp = []
        for key_metro in station_data[lines[ii]]:
            for key_bike in bike_location:
                dist = vincenty(station_data[lines[ii]][key_metro], bike_location[key_bike]).miles
                if dist <= distance:
                    bikes_temp.append(key_bike)
                    print([lines[ii], key_metro, key_bike, dist])
        bikes_close[lines[ii]] = list(set(bikes_temp))
    return bikes_close

In [57]:
lines = ['RD', 'YL', 'GR','BL', 'OR', 'SV']
bikes_close = dict()
for ii in range(len(lines)):
    bikes_temp = []
    for key_metro in station_data[lines[ii]]:
        for key_bike in bike_location:
            dist = vincenty(station_data[lines[ii]][key_metro], bike_location[key_bike]).miles
            if dist <= 0.25:
                bikes_temp.append(key_bike)
                print([lines[ii], key_metro, key_bike, dist])
    bikes_close[lines[ii]] = list(set(bikes_temp))

['RD', 'Metro Center', '14th St & New York Ave NW', 0.21398470451775278]
['RD', 'Metro Center', '13th St & New York Ave NW', 0.16516781023326638]
['RD', 'Metro Center', 'Metro Center / 12th & G St NW', 0.01309254369908234]
['RD', 'Metro Center', '14th & G St NW', 0.2013871937335083]
['RD', 'Metro Center', '11th & F St NW', 0.06795192147122418]
['RD', 'Metro Center', '10th & E St NW', 0.19796628566735064]
['RD', 'Metro Center', 'MLK Library/9th & G St NW', 0.2059236753082303]
['RD', 'Farragut North', '17th & K St NW', 0.06809744971683841]
['RD', 'Farragut North', '18th & M St NW', 0.14583455959811434]
['RD', 'Farragut North', '19th & L St NW', 0.2097660688445131]
['RD', 'Farragut North', '17th & K St NW / Farragut Square', 0.11020264340930738]
['RD', 'Farragut North', '17th & Rhode Island Ave NW', 0.24109371721077677]
['RD', 'Farragut North', '19th & K St NW', 0.205866878789979]
['RD', 'Farragut North', '16th & K St NW', 0.22001877202091058]
['RD', 'Dupont Circle', 'Massachusetts Ave & 

In [59]:
print(len(bikes_close['GR']))
print(bikes_close['GR'][:5])

37
['8th & O St NW', '10th & U St NW', '3rd & Tingey St SE', '5th St & Massachusetts Ave NW', 'Anacostia Metro']


I now have a dictionary of a list of bike stations (values) within 0.25 miles of each metro line (key).  
Save it with pickle for use in other notebooks

In [60]:
pickle.dump( bikes_close, open( "bikes_close.p", "wb" ) )

### step 2) 
how many bike stations are considered close to each line? I need to make sure the numbers are appropriate for doing statistical analysis

In [63]:
for ii in bikes_close:
    print(ii, len(bikes_close[ii]))

RD 57
YL 42
GR 37
BL 57
OR 62
SV 66


About 10% to 15% of bike stations are within 0.25 miles of each metro line

test my function

In [8]:
fn_test = close_stations(0.1)

['RD', 'Metro Center', 'Metro Center / 12th & G St NW', 0.01309254369908234]
['RD', 'Metro Center', '11th & F St NW', 0.06795192147122418]
['RD', 'Farragut North', '17th & K St NW', 0.06809744971683841]
['RD', 'Dupont Circle', 'Massachusetts Ave & Dupont Circle NW', 0.05904192211308173]
['RD', 'Dupont Circle', '20th & O St NW / Dupont South', 0.07475003434079858]
['RD', 'Cleveland Park', 'Connecticut Ave & Newark St NW / Cleveland Park', 0.032888310887331594]
['RD', 'Van Ness-UDC', 'Van Ness Metro / UDC', 0.06748728170569013]
['RD', 'Tenleytown-AU', 'Tenleytown / Wisconsin Ave & Albemarle St NW', 0.01870215984191263]
['RD', 'Friendship Heights', 'Friendship Hts Metro/Wisconsin Ave & Wisconsin Cir', 0.07030935754340062]
['RD', 'Bethesda', 'Montgomery & East Ln', 0.07256963207918365]
['RD', 'Bethesda', 'Bethesda Metro', 0.028784887064349826]
['RD', 'Medical Center', 'Medical Center Metro ', 0.05183570243479291]
['RD', 'Rockville', 'Rockville Metro East', 0.08791680580313965]
['RD', 'Rock

In [11]:
for ii in fn_test:
    print(ii, len(fn_test[ii]))

RD 20
YL 13
GR 12
BL 18
OR 19
SV 21


In [12]:
fn_test

{'BL': ['Braddock Rd Metro',
  'Metro Center / 12th & G St NW',
  'Benning Rd & East Capitol St NE / Benning Rd Metro',
  'USDA / 12th & Independence Ave SW',
  'King St Metro South',
  '17th & K St NW / Farragut Square',
  'Crystal City Metro / 18th & Bell St',
  'Lynn & 19th St North',
  '11th & F St NW',
  'King St Metro North / Cameron St',
  '22nd & I St NW / Foggy Bottom',
  '1st & D St SE',
  '15th & K St NW',
  'Smithsonian-National Mall / Jefferson Dr & 12th St SW',
  'New Hampshire Ave & 24th St NW',
  'Eastern Market Metro / Pennsylvania Ave & 7th St SE',
  'Pentagon City Metro / 12th & S Hayes St',
  'Potomac & Pennsylvania Ave SE'],
 'GR': ['Anacostia Metro',
  '12th & U St NW',
  'Congress Heights Metro',
  '7th & F St NW/Portrait Gallery',
  '7th & R St NW / Shaw Library',
  'Convention Center / 7th & M St NW',
  'Georgia & New Hampshire Ave NW',
  '4th & M St SW',
  '8th & D St NW',
  '14th & Irving St NW',
  'M St & New Jersey Ave SE',
  'Fort Totten Metro'],
 'OR': ['