### Scrape All ISP's

In [1]:
import pandas as pd
import gzip
import json
from pandas_geojson import to_geojson
from hughes_utils import get_hughes_offer_data
from xfinity_utils import get_xfinity_offer_data
from viastat_utils import get_viastat_offer_data

In [2]:
# set notebook variables
CITY = 'Boston'
INPUT_CSV_PATH = "../data/open_address/processed/csv/city_of_boston-addresses-city.csv.gz"

In [3]:
# read in all data address data for specified city
city_df = pd.read_csv(INPUT_CSV_PATH, dtype='str')

# drop rows whose postcode is not given values
city_df.dropna(subset=['postcode'], axis=0, inplace=True)
# take one house number if multiple given
city_df['number'] = city_df.number.apply(lambda x: x.split('-')[0])
# cast lat and lon columns as str
city_df['lat'] = city_df['lat'].astype(float)
city_df['lon'] = city_df['lon'].astype(float)
city_df.head(5)

Unnamed: 0,hash,number,street,unit,city,district,region,postcode,id,incorporated_place,state,lon,lat,block_group
0,0f869e17187b4140,10,Allston Street,3.0,Boston,Suffolk County,,2129,,Boston city,MA,-71.063955,42.38001,4
1,52fcca90d274bcf4,55,Waldeck Street,,Boston,Suffolk County,,2124,,Boston city,MA,-71.066813,42.296728,4
2,8191565c459d8e98,229,North Street,,Boston,Suffolk County,,2109,,Boston city,MA,-71.0526,42.36338,4
3,b38857f6f0870076,79,Evans Street,1.0,Boston,Suffolk County,,2124,,Boston city,MA,-71.08099,42.2824,4
4,2cde7c86278311a0,10,E Springfield Street,4.0,Boston,Suffolk County,,2118,,Boston city,MA,-71.07575,42.33673,1


Utils Functions

In [4]:
# transforms dataframe to geojson file
def df_to_geojson(df):    
    geojson_data = to_geojson(df=df, lat = 'lat', lon = 'lon', properties=['address_full', 'incorporated_place', 'state','collection_datetime', 'provider', 'speed_down', 'speed_up', 'speed_unit', 'price', 'technology', 'package', 'fastest_speed_down', 'fastest_speed_price'])
    return geojson_data

In [5]:
# saves geojson data to geojson.gz file
def compress_geojson(gejson_data, output_file):
   with gzip.open(output_file, 'wt', encoding='utf-8') as f:
        json.dump(gejson_data, f, ensure_ascii=False)

In [6]:
hughes_output_path = "../data/intermediary/isp/hughes/"+CITY+'/'
xfinity_output_path = "../data/intermediary/isp/xfinity/"+CITY+'/'
viastat_output_path = "../data/intermediary/isp/viastat/"+CITY+'/'

index = 1
for _, row in city_df.iterrows():
    street = row['street']
    number = row['number']
    streetType = street.split()[-1]
    streetName = " ".join(word for word in street.split()[:-1] if word not in streetType)
    city = row['city']
    zipcode = row['postcode']
    state = row['state']
    lat = row['lat']
    lon = row['lon']
    
    try:
        hughes_offer = get_hughes_offer_data(house_number= number, street_name = streetName, street_type=streetType, city=city, state=state, zip_code= str(zipcode), lat = lat, long = lon)
        hughes_df = pd.DataFrame(hughes_offer, index=[0])
        print('Hughes \n', hughes_df)
        hughes_geojson = df_to_geojson(hughes_df)
        compress_geojson(hughes_geojson, hughes_output_path+str(index)+'.geojson.gz')
    except:
        print('Hughes failed')
    
    try:
        xfinity_offer = get_xfinity_offer_data(house_number= number, street_name = streetName, street_type=streetType, city=city, state=state, zip_code= zipcode, lat = lat, long = lon)
        xfinity_df = pd.DataFrame(data=xfinity_offer, index=[0])
        print(xfinity_df)
        xfinity_geojson = df_to_geojson(xfinity_df)
        compress_geojson(xfinity_geojson, xfinity_output_path+str(index)+'.geojson.gz')
    except:
        print('Xfinity failed')
    
    try:
        viastat_offer = get_viastat_offer_data("0", f'{number} {streetName} {streetType}', city, state, int(zipcode), lat, lon)
        viastat_offer = pd.DataFrame(data=viastat_offer, index=[0])
        print(viastat_df)
        viastat_geojson = df_to_geojson(viastat_df)
        compress_geojson(viastat_geojson, viastat_output_path+str(index)+'.geojson.gz')
    except:
        print('Viastat failed')
    
    print('\n\n')
    
    index += 1
   


Hughes 
                             address_full incorporated_place state       lat  \
0  10, Allston Street, Boston, MA, 02129             Boston    MA  42.38001   

         lon  collection_datetime provider speed_down  speed_up speed_unit  \
0 -71.063955         1.701713e+09   hughes         15         3       Mbps   

   price technology package  fastest_speed_down fastest_speed_price  
0  49.99  Satellite   15 GB                  25               49.99  
Xfinity failed
Viastat failed



Hughes 
                             address_full incorporated_place state        lat  \
0  55, Waldeck Street, Boston, MA, 02124             Boston    MA  42.296728   

         lon  collection_datetime provider speed_down  speed_up speed_unit  \
0 -71.066813         1.701713e+09   hughes         15         3       Mbps   

   price technology package  fastest_speed_down fastest_speed_price  
0  49.99  Satellite   15 GB                  25               49.99  
                            address