### Scrape All ISP's

In [3]:
import pandas as pd
import gzip
import json
from pandas_geojson import to_geojson
from hughes_utils import get_hughes_offer_data
from xfinity_utils import get_xfinity_offer_data
from viastat_utils import get_viastat_offer_data

In [1]:
# set notebook variables
CITY = 'Boston'
INPUT_CSV_PATH = "../data/open_address/processed/csv/city_of_boston-addresses-city.csv.gz"

In [11]:
# read in all data address data for specified city
city_df = pd.read_csv(INPUT_CSV_PATH)

# for testing only
city_df = city_df[0:5]

# drop na values
city_df.dropna(subset=['postcode'], axis=0, inplace=True)
# take one number if multiple given
city_df['number'] = city_df.number.apply(lambda x: x.split('-')[0])
city_df.head(5)

  city_df = pd.read_csv(INPUT_CSV_PATH)


Unnamed: 0,hash,number,street,unit,city,district,region,postcode,id,incorporated_place,state,lon,lat
0,6db8d40a96a73ac7,6,A Street,,Boston,Suffolk County,,2136,,Boston,MA,-71.125036,42.250618
1,96b30bef0eef7d86,7,A Street,,Boston,Suffolk County,,2136,,Boston,MA,-71.1254,42.25046
2,a23223056b6593a0,10,A Street,,Boston,Suffolk County,,2127,,Boston,MA,-71.0568,42.34088
3,d878c704ce81bba1,172,A Street,,Boston,Suffolk County,,2210,,Boston,MA,-71.053148,42.344837
4,222c7a044af11e7a,176,A Street,,Boston,Suffolk County,,2210,,Boston,MA,-71.05306,42.344958


Utils Functions

In [12]:
# transforms dataframe to geojson file
def df_to_geojson(df):    
    geojson_data = to_geojson(df=df, lat = 'lat', lon = 'lon', properties=['address_full', 'incorporated_place', 'state','collection_datetime', 'provider', 'speed_down', 'speed_up', 'speed_unit', 'price', 'technology', 'package', 'fastest_speed_down', 'fastest_speed_price'])
    return geojson_data

In [13]:
# saves geojson data to geojson.gz file
def compress_geojson(gejson_data, output_file):
   with gzip.open(output_file, 'wt', encoding='utf-8') as f:
        json.dump(gejson_data, f, ensure_ascii=False)

In [14]:
hughes_output_path = "../data/intermediary/isp/hughes/"+CITY+'/'
xfinity_output_path = "../data/intermediary/isp/xfinity/"+CITY+'/'
viastat_output_path = "../data/intermediary/isp/viastat/"+CITY+'/'

index = 1
for _, row in city_df.iterrows():
    street = row['street']
    number = row['number']
    streetType = street.split()[-1]
    streetName = " ".join(word for word in street.split()[:-1] if word not in streetType)
    city = row['city']
    zipcode = row['postcode']
    state = row['state']
    lat = row['lat']
    lon = row['lon']
    
    try:
        hughes_offer = get_hughes_offer_data(house_number= number, street_name = streetName, street_type=streetType, city=city, state=state, zip_code= str(zipcode), lat = lat, long = lon)
        hughes_df = pd.DataFrame(hughes_offer, index=[0])
        print(hughes_df)
        hughes_geojson = df_to_geojson(hughes_df)
        compress_geojson(hughes_geojson, hughes_output_path+str(index)+'.geojson.gz')
    except:
        print('Hughes failed')
    
    try:
        xfinity_offer = get_xfinity_offer_data(house_number= number, street_name = streetName, street_type=streetType, city=city, state=state, zip_code= zipcode, lat = lat, long = lon)
        xfinity_df = pd.DataFrame(data=xfinity_offer, index=[0])
        print(xfinity_df)
        xfinity_geojson = df_to_geojson(xfinity_df)
        compress_geojson(xfinity_geojson, xfinity_output_path+str(index)+'.geojson.gz')
    except:
        print('Xfinity failed')
    
    try:
        viastat_offer = get_viastat_offer_data("0", f'{number} {streetName} {streetType}', city, state, zipcode, lat, lon)
        viastat_offer = pd.DataFrame(data=viastat_offer, index=[0])
        print(viastat_df)
        viastat_geojson = df_to_geojson(viastat_df)
        compress_geojson(viastat_geojson, viastat_output_path+str(index)+'.geojson.gz')
    except:
        print('Viastat failed')
    
    index += 1
   


                     address_full incorporated_place state        lat  \
0  6, A Street, Boston, MA, 02136             Boston    MA  42.250618   

         lon  collection_datetime provider speed_down  speed_up speed_unit  \
0 -71.125036         1.701698e+09   hughes         15         3       Mbps   

   price technology package  fastest_speed_down fastest_speed_price  
0  49.99  Satellite   15 GB                  25               49.99  
                     address_full incorporated_place state        lat  \
0  6, A Street, Boston, MA, 02136             Boston    MA  42.250618   

         lon  collection_datetime provider speed_down speed_up speed_unit  \
0 -71.125036         1.701698e+09  xfinity        400       10       Mbps   

    price technology package  fastest_speed_down fastest_speed_price  
0  102.00      Cable    Fast                1200              117.00  
Viastat failed
                     address_full incorporated_place state       lat      lon  \
0  7, A Street, 