In [562]:
import json
import pandas as pd
import geopandas as gpd
import os
import statsmodels.api as sma
import altair as alt

In [710]:
# assign directory
directory = 'data/raw/gunbroker/'

In [711]:
data_list = []

In [712]:
# iterate over files in directory
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        file = open(f)
        data = json.load(file)
        data_list = data_list + data.get('results')

In [713]:
len(data_list)

37514

In [714]:
df = pd.DataFrame.from_records(data_list)

In [715]:
df.columns

Index(['fflID', 'name', 'company', 'address1', 'address2', 'city', 'state',
       'zip', 'phone', 'fax', 'cellPhone', 'otherPhone', 'hours', 'longGunFee',
       'longGunDescription', 'handGunFee', 'handGunDescription', 'nicsFee',
       'nicsDescription', 'otherFee', 'otherDescription', 'promotionalText',
       'website', 'latitude', 'longitude', 'licenseNumber', 'distance',
       'licenseOnFile', 'links'],
      dtype='object')

In [716]:
df.drop(['links','distance'], axis=1, inplace=True)

In [717]:
df.drop_duplicates(inplace=True)

In [718]:
df.head()

Unnamed: 0,fflID,name,company,address1,address2,city,state,zip,phone,fax,...,nicsFee,nicsDescription,otherFee,otherDescription,promotionalText,website,latitude,longitude,licenseNumber,licenseOnFile
0,57794,Craig Sirna,Tactical Assault Specialist,19009 RAVENNA RD,,Chagrin Falls,OH,44023,4408340696,,...,0.0,,0.0,,,,41.3504,-81.22385,4-34-XXX-XX-XX-07720,True
1,39791,,A&Z Sales and Service,302 west 7th St,,Evart,MI,49631,231-734-5070,,...,0.0,,0.0,,,,43.90047,-85.26265,4-38-XXX-XX-XX-08968,True
2,63430,"James E. Arens, Jr.","Arens Ballistics Company, Ltd.",1035 Gartner Dr.,,Obetz,OH,43207,614-321-1699,,...,5.0,,5.0,,,,39.874,-82.9707,4-31-XXX-XX-XX-08547,True
3,39281,Dean Williams,"Williams, Dean Dennis",2564 N. Aragon Ave,,Kettering,OH,45420,937-902-3731,,...,20.0,,0.0,,,,39.7135,-84.1172,4-31-XXX-XX-XX-04249,True
4,60812,Zac Hendrix,Vance Outdoors,4250 Alum Creek Drive,,Obetz,OH,43207,6144895025,,...,0.0,,0.0,,,,39.88289,-82.93063,4-31-XXX-XX-XX-06052,True


In [719]:
len(df)

29399

In [720]:
df.to_csv("data/processed/gunbroker.csv", index=False)

### Assign a county

In [734]:
counties = gpd.read_file("data/processed/counties.geojson")

In [735]:
counties

Unnamed: 0,statefp,countyfp,county,county_area,state,geoid,geometry
0,02,013,Aleutians East,15009.939752,Alaska,02013,"MULTIPOLYGON (((-3801432.512 3133472.767, -380..."
1,02,016,Aleutians West,14116.419570,Alaska,02016,"MULTIPOLYGON (((-4900590.329 3834004.986, -490..."
2,28,107,Panola,705.131639,Mississippi,28107,"MULTIPOLYGON (((513070.490 -662207.945, 513069..."
3,28,101,Newton,579.601941,Mississippi,28101,"MULTIPOLYGON (((614290.086 -847983.437, 614317..."
4,28,027,Coahoma,583.152412,Mississippi,28027,"MULTIPOLYGON (((474830.268 -692448.051, 474470..."
...,...,...,...,...,...,...,...
3216,37,077,Granville,536.498459,North Carolina,37077,"MULTIPOLYGON (((1455833.489 -317345.762, 14557..."
3217,37,111,McDowell,445.994701,North Carolina,37111,"MULTIPOLYGON (((1207528.929 -416252.481, 12075..."
3218,27,021,Cass,2413.993603,Minnesota,27021,"MULTIPOLYGON (((96243.198 814680.164, 96242.80..."
3219,27,057,Hubbard,999.559340,Minnesota,27057,"MULTIPOLYGON (((59375.994 845151.916, 59373.36..."


In [736]:
# Set to NAD 1983 Albers North America - https://epsg.io/102008
counties = counties.to_crs("EPSG:4269")

In [737]:
# Create geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(ffl_merge_trim.longitude, ffl_merge_trim.latitude))

In [738]:
# Set to NAD 1983 Albers North America - https://epsg.io/102008
gdf = gdf.set_crs("EPSG:4269")

In [739]:
# Merge with county
gdf_county = gdf.sjoin(counties, how="left", predicate='intersects')
gdf_county.drop('index_right', axis=1, inplace=True)

In [740]:
gdf_county.head()

Unnamed: 0,lic_regn,lic_dist,lic_cnty,lic_type,lic_xprdte,lic_seqn,license_name,business_name,premise_street,premise_city,...,promotionalText,website,licenseOnFile,geometry,statefp_right,countyfp_right,county_right,county_area_right,state,geoid
3,6,4,13.0,7.0,4D,12422,GUN VALLEY ARMS LLC,,81 RAMAH CIRCLE SOUTH SUITE 5,AGAWAM,...,,,True,POINT (-72.63132 42.08435),25,13,Hampden,634.251128,Massachusetts,25013
12,6,4,27.0,1.0,4F,14926,"JJT ENTERPRISES, LLC",DOWN RANGE SPORTS,590 SUMMER STREET,BARRE,...,,,True,POINT (-72.09790 42.40992),25,27,Worcester,1579.19392,Massachusetts,25027
14,6,4,15.0,1.0,3E,36592,"EVERETT, DOUGLAS FORDE",SWIFT RIVER GUNWORKS,450 STATE ST,BELCHERTOWN,...,,,False,POINT (-72.43958 42.26582),25,15,Hampshire,545.20927,Massachusetts,25015
21,6,4,15.0,7.0,3D,14383,KC SMALL ARMS LLC,,412 MAIN STREET,EASTHAMPTON,...,,,True,POINT (-72.69247 42.25332),25,15,Hampshire,545.20927,Massachusetts,25015
24,6,4,13.0,1.0,3G,12049,"YACOVONE, STEPHEN ALAN",INSIGHT SALES,143E SHAKER RD SUITE 200E,EAST LONGMEADOW,...,,,True,POINT (-72.51679 42.05884),25,13,Hampden,634.251128,Massachusetts,25013


In [741]:
gdf_county = gdf_county.sort_values(["company","state"])

In [742]:
gdf_county.to_file("data/processed/gunbroker_locations.geojson", driver='GeoJSON')

### Group by county

In [743]:
gdf_county_merge = gdf_county.groupby(["geoid"]).size().reset_index(name="dealers")

In [744]:
gdf_county_merge.head()

Unnamed: 0,geoid,dealers
0,1001,12
1,1003,35
2,1005,5
3,1007,1
4,1009,11


In [770]:
counties_merge_full = counties[["geoid","state","county","county_area"]].merge(gdf_county_merge, on="geoid", how="left")

In [771]:
counties_merge_full.head()

Unnamed: 0,geoid,state,county,county_area,dealers
0,2013,Alaska,Aleutians East,15009.939752,
1,2016,Alaska,Aleutians West,14116.41957,1.0
2,28107,Mississippi,Panola,705.131639,8.0
3,28101,Mississippi,Newton,579.601941,6.0
4,28027,Mississippi,Coahoma,583.152412,3.0


### Create dealer rate

In [776]:
final_df["dealer_rate"] = final_df["dealers"] / final_df["county_area"]

### Add neighbor dealer rate

In [777]:
neighbor_file = open("neighbor-counties.json")
neighbors = json.load(neighbor_file)

In [778]:
neighbor_dealer_sum = []
neighbor_area_sum = []
# neighbor_area_population = []

for row in final_df.itertuples():
    neighbor_filter = final_df[final_df["geoid"].isin(neighbors.get(row.geoid))]
    neighbor_dealer_sum.append(neighbor_filter["dealers"].sum())
    neighbor_area_sum.append(neighbor_filter["county_area"].sum())
    # neighbor_area_population.append(neighbor_filter["population"].sum())

final_df["neighbor_dealers"] = neighbor_dealer_sum
final_df["neighbor_area"] = neighbor_area_sum
# final_df["neighbor_population"] = neighbor_area_population

In [779]:
final_df["neighbor_dealers_rate"] = (final_df["dealers"] + final_df["neighbor_dealers"]) / (final_df["county_area"] + final_df["neighbor_area"])

In [780]:
final_df['neighbor_dealers_rate_adj'] = final_df.apply(lambda x: x["dealer_rate"] if x["county_area"] >= 50 else x["neighbor_dealers_rate"], axis = 1)

In [781]:
final_df.sort_values("neighbor_dealers_rate_adj", ascending=False).head(10)

Unnamed: 0,geoid,state,county,county_area,dealers,population,poverty_pct,dealer_rate,neighbor_dealers,neighbor_area,neighbor_dealers_rate,neighbor_dealers_rate_adj
1223,48439,Texas,Tarrant,902.304892,273.0,2050487.0,0.113635,0.302558,485.0,5380.480972,0.120647,0.302558
216,48201,Texas,Harris,1777.48255,397.0,4634207.0,0.156065,0.22335,331.0,7009.966841,0.082845,0.22335
10,48113,Texas,Dallas,908.613868,177.0,2592698.0,0.145804,0.194802,657.0,4649.510694,0.150051,0.194802
628,22055,Louisiana,Lafayette,269.208664,48.0,238082.0,0.16685,0.1783,56.0,4985.524363,0.019792,0.1783
2264,12103,Florida,Pinellas,608.126655,107.0,955568.0,0.115877,0.17595,156.0,2134.181766,0.095905,0.17595
1257,40143,Oklahoma,Tulsa,587.018072,99.0,640621.0,0.14342,0.168649,101.0,6297.340636,0.029051,0.168649
2711,48085,Texas,Collin,886.103501,148.0,1000193.0,0.062556,0.167023,395.0,4770.391728,0.095996,0.167023
128,42045,Pennsylvania,Delaware,190.603645,31.0,544692.0,0.099096,0.162641,129.0,2220.316862,0.066365,0.162641
828,13067,Georgia,Cobb,344.517747,56.0,744737.0,0.086042,0.162546,136.0,1954.210882,0.083524,0.162546
1079,49035,Utah,Salt Lake,807.368563,130.0,1130965.0,0.085993,0.161017,182.0,13763.079168,0.021413,0.161017


### Compare gunbroker.com list to ATF FFL list

In [827]:
# Create geodataframe
gb_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))

In [828]:
# Set to NAD 1983 Albers North America - https://epsg.io/102008
gb_gdf = gb_gdf.set_crs("EPSG:4269")

In [829]:
# Merge with county
gb_gdf_county = gb_gdf.sjoin(counties, how="left", predicate='intersects')
gb_gdf_county.drop('index_right', axis=1, inplace=True)

In [830]:
gb_gdf_county_merge = gb_gdf_county.groupby(["geoid"]).size().reset_index(name="dealers")

In [831]:
gb_counties_merge_full = counties[["geoid","state","county","county_area"]].merge(gb_gdf_county_merge, on="geoid", how="left")

In [832]:
gb_counties_merge_full["dealer_rate"] = gb_counties_merge_full["dealers"] / gb_counties_merge_full["county_area"]

In [833]:
gb_counties_merge_full.head()

Unnamed: 0,geoid,state,county,county_area,dealers,dealer_rate
0,2013,Alaska,Aleutians East,15009.939752,,
1,2016,Alaska,Aleutians West,14116.41957,1.0,7.1e-05
2,28107,Mississippi,Panola,705.131639,5.0,0.007091
3,28101,Mississippi,Newton,579.601941,4.0,0.006901
4,28027,Mississippi,Coahoma,583.152412,1.0,0.001715


In [837]:
ffl_counties = pd.read_csv("data/processed/dealers-list/2023/0123-ffl-list-county-summary.csv", dtype={"lic_regn":str,"lic_dist":str,"lic_seqn":str,"geoid":str})

In [836]:
ffl_counties.columns

Index(['statefp', 'state', 'countyfp', 'geoid', 'county', 'county_area',
       'county_population', 'county_white', 'county_black', 'county_asian',
       'county_latino', 'county_median_income', 'county_poverty', 'id',
       'business', 'pawn', 'sport', 'defense', 'big_box', 'small_business',
       'commercial', 'residential', 'all_density_land', 'all_density_pop',
       'business__pct', 'pawn__pct', 'sport_pct', 'defense_pct', 'big_box_pct',
       'small_business_pct', 'residential_pct', 'commercial_pct',
       'neighbor_id', 'neighbor_pawn', 'neighbor_sm_business', 'neighbor_area',
       'neighbor_population'],
      dtype='object')

In [838]:
compare = gb_counties_merge_full[["geoid","dealer_rate"]].merge(ffl_counties[["geoid","all_density_land"]], on="geoid", how="outer")

In [839]:
compare

Unnamed: 0,geoid,dealer_rate,all_density_land
0,02013,,0.019987
1,02016,0.000071,0.028336
2,28107,0.007091,1.985445
3,28101,0.006901,1.552790
4,28027,0.001715,0.514445
...,...,...,...
3216,37077,0.016775,4.473452
3217,37111,0.024664,7.399191
3218,27021,0.003728,1.077053
3219,27057,0.004002,1.600705


In [843]:
compare["all_density_land"] = compare["all_density_land"] / 100

In [844]:
compare[["dealer_rate","all_density_land"]].corr()

Unnamed: 0,dealer_rate,all_density_land
dealer_rate,1.0,0.845759
all_density_land,0.845759,1.0


In [878]:
fig = alt.Chart(compare).mark_point().encode(
    x=alt.X('dealer_rate:Q', title="Gunbroker.com dealers per mile"),
    y=alt.Y('all_density_land:Q', title="All FFL dealers per mile"),
).properties(
    width=300,
    height=250
)
# making the regression line using transform_regression function and add with the scatter plot
final_plot = fig + fig.transform_regression('dealer_rate','all_density_land').mark_line()

In [879]:
final_plot