### Scrape All ISP's

In [1]:
import pandas as pd
import gzip
import json
from pandas_geojson import to_geojson
from hughes_utils import get_hughes_offer_data
from xfinity_utils import get_xfinity_offer_data
from viastat_utils import get_viastat_offer_data

In [2]:
# set notebook variables
CITY = 'hartford'
INPUT_CSV_PATH = "../data/open_address/processed/csv/city_of_hartford-addresses-city.csv.gz"

In [3]:
# read in all data address data for specified city
city_df = pd.read_csv("../data/open_address/processed/csv/city_of_hartford-addresses-city.csv.gz")

# for testing only
city_df = city_df[0:1]
city_df['postcode']= '06101'
city_df

# drop na values
city_df.dropna(subset=['postcode'], axis=0, inplace=True)

Utils Functions

In [4]:
# transforms dataframe to geojson file
def df_to_geojson(df):    
    geojson_data = to_geojson(df=df, lat = 'lat', lon = 'lon', properties=['address_full', 'incorporated_place', 'state','collection_datetime', 'provider', 'speed_down', 'speed_up', 'speed_unit', 'price', 'technology', 'package', 'fastest_speed_down', 'fastest_speed_price'])
    return geojson_data

In [5]:
# saves geojson data to geojson.gz file
def compress_geojson(gejson_data, output_file):
   with gzip.open(output_file, 'wt', encoding='utf-8') as f:
        json.dump(gejson_data, f, ensure_ascii=False)

In [8]:
hughes_output_path = "../data/intermediary/isp/hughes/"+CITY+'/'
xfinity_output_path = "../data/intermediary/isp/xfinity/"+CITY+'/'
viastat_output_path = "../data/intermediary/isp/viastat/"+CITY+'/'

index = 1
for _, row in city_df.iterrows():
    street = row['street']
    number = row['number']
    streetType = street.split()[-1]
    streetName = " ".join(word for word in street.split()[:-1] if word not in streetType)
    city = row['city']
    zipcode = row['postcode']
    state = row['state']
    lat = row['lat']
    lon = row['lon']
    
    hughes_offer = get_hughes_offer_data(house_number= number, street_name = streetName, street_type=streetType, city=city, state=state, zip_code= str(zipcode), lat = lat, long = lon)
    xfinity_offer = get_xfinity_offer_data(house_number= number, street_name = streetName, street_type=streetType, city=city, state=state, zip_code= zipcode, lat = lat, long = lon)
    # viastat_offer = get_viastat_offer_data("0", f'{number} {streetName} {streetType}', city, state, zipcode, lat, lon)

    hughes_df = pd.DataFrame(hughes_offer, index=[0])
    xfinity_df = pd.DataFrame(data=xfinity_offer, index=[0])
    # viastat_offer = pd.DataFrame(data=viastat_offer, index=[0])

    hughes_geojson = df_to_geojson(hughes_df)
    xfinity_geojson = df_to_geojson(xfinity_df)
    # viastat_geojson = df_to_geojson(viastat_df)

    compress_geojson(hughes_geojson, hughes_output_path+str(index)+'.geojson.gz')
    compress_geojson(xfinity_geojson, xfinity_output_path+str(index)+'.geojson.gz')
    # compress_geojson(viastat_geojson, viastat_output_path+str(index)+'.geojson.gz')
   
