In [1]:
import pandas as pd 
import altair as alt 
from vega_datasets import data


``` json
{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "width": 300,
  "height": 150,
  "padding": {"top": 0, "left": 0, "right": 0, "bottom": 0},
  "signals": [
    {
      "name": "hover",
      "value": null,
      "on": [
        {"events": "symbol:mouseover", "update": "datum"},
        {"events": "symbol:mouseout", "update": "null"}
      ]
    },
    {
      "name": "title",
      "value": "Internet Archive BoL 2011-2023",
      "update": "hover ? hover.name + ' (' + hover.address + ')' : 'Internet Archive BoL 2011-2023'"
    }
  ],
  "data": [
    {
      "name": "world",
      "url": "data/world-110m.json",
      "format": {"type": "topojson", "feature": "countries"},
      "transform": [{"type": "geopath", "projection": "projection"}]
    },
    {
      "name": "locations",
      "url": "https://raw.githubusercontent.com/ers6/ia_bols/main/location_key.csv",
      "format": {"type": "csv", "parse": "auto", "delimiter": ","},
      "transform": [
        {
          "type": "geopoint",
          "projection": "projection",
          "fields": ["lon", "lat"]
        },
        {"type": "filter", "expr": "datum.x != null && datum.y != null"}
      ]
    },
    {
      "name": "shipments",
      "url": "https://raw.githubusercontent.com/ers6/ia_bols/main/shipments.csv",
      "format": {"type": "csv", "parse": "auto", "delimiter": ","},
      "transform": [
        {
          "type": "lookup",
          "from": "locations",
          "key": "id",
          "fields": ["supplier_location_id", "company_location_id"],
          "as": ["source", "target"]
        },
        {"type": "filter", "expr": "datum.source && datum.target"},
        {"type": "linkpath", "shape": "line"}
      ]
    }
  ],
  "scales": [],
  "projections": [
    {
      "name": "projection",
      "type": "equalEarth",
      "scale": 200,
      "translate": [{"signal": "width / 2"}, {"signal": "height / 2"}]
    }
  ],
  "marks": [
    {
      "type": "path",
      "from": {"data": "world"},
      "encode": {
        "enter": {"fill": {"value": "#dedede"}, "stroke": {"value": "white"}},
        "update": {"path": {"field": "path"}}
      }
    },
    {
      "type": "symbol",
      "from": {"data": "locations"},
      "encode": {
        "enter": {
          "size": {"value": 16},
          "fill": {"value": "steelblue"},
          "fillOpacity": {"value": 0.8},
          "stroke": {"value": "black"},
          "strokeWidth": {"value": 1.5}
        },
        "update": {"x": {"field": "x"}, "y": {"field": "y"}}
      }
    },
    {
      "type": "text",
      "interactive": false,
      "encode": {
        "enter": {
          "x": {"signal": "width", "offset": -500},
          "y": {"value": 0},
          "fill": {"value": "black"},
          "fontSize": {"value": 20},
          "align": {"value": "right"}
        },
        "update": {"text": {"signal": "title"}}
      }
    },
    {
      "type": "path",
      "interactive": false,
      "from": {"data": "shipments"},
      "encode": {
        "enter": {
          "path": {"field": "path"},
          "stroke": {"value": "black"},
          "strokeOpacity": {"value": 0.15}
        }
      }
    }
  ],
  "config": {}
}
```

In [6]:

# Since these data are each more than 5,000 rows we'll import from the URLs
airports = data.airports.url
locations = "https://raw.githubusercontent.com/ers6/ia_bols/main/location_key.csv"
flights_airport = data.flights_airport.url
shipments = "https://raw.githubusercontent.com/ers6/ia_bols/main/shipments.csv"
world = alt.topo_feature(data.world_110m.url, 'countries')

# Create mouseover selection
select_city = alt.selection_single(
  
    on="mouseover", nearest=True, fields=["company_location_id"], empty="none"
)


selector=alt.selection_single(fields=['long', 'lat'], on='click', nearest=True)


# Define which attributes to lookup from airports.csv
lookup_data = alt.LookupData(
    locations, key="id", fields=["name", "lat", "lon"]
)

background = alt.Chart(world).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=800,
    height=600
).project('equalEarth')


connections = alt.Chart(shipments).mark_rule(opacity=0.2
).encode(
    latitude="lat:Q",
    longitude="lon:Q",
    latitude2="lat2:Q",
    longitude2="lon2:Q"
    
#     size = alt.Size("weight_kg:Q", scale=alt.Scale(range=[2,10]), legend=None), 
#     order=alt.Order("weight_kg:Q", sort="descending")
).transform_lookup(
    lookup="supplier_location_id",
    from_=lookup_data
).transform_lookup(
    lookup="company_location_id",
    from_=lookup_data,
    as_=["name", "lat2","lon2"]
).transform_filter(
    select_city
)
# alt.Chart(locations).mark_circle().encode(
points = alt.Chart(shipments).mark_circle().encode(
    latitude="lat:Q",
    longitude="lon:Q"
).transform_aggregate(
    routes="count()",
    groupby=["company_location_id"]
).transform_lookup(
lookup="company_location_id",
from_=lookup_data
).add_selection(
    select_city
)

suppliers = alt.Chart(locations).mark_circle().encode(
latitude="lat:Q",
longitude="lon:Q", 
tooltip = ["company_location_id:N"]
).transform_filter(
alt.datum.name != "Internet Archive"
)


(background + connections + suppliers + points).configure_view(stroke=None)


# would be more interesting and helpful to make the circles of the scan centers based on weight of goods shipped 
# instead of the lines because it's very f


In [None]:
pd.read_csv(airports)

In [78]:
pd.read_csv(flights_airport)

Unnamed: 0,origin,destination,count
0,ABE,ATL,853
1,ABE,BHM,1
2,ABE,CLE,805
3,ABE,CLT,465
4,ABE,CVG,247
...,...,...,...
5361,YUM,IPL,326
5362,YUM,LAS,99
5363,YUM,LAX,1044
5364,YUM,PHX,1961


In [3]:
locations = pd.read_csv("https://raw.githubusercontent.com/ers6/ia_bols/main/location_key.csv")
locations

Unnamed: 0.1,Unnamed: 0,id,name,address,lat,lon
0,0,18excellence,"Tuling Info&Tech Co., Ltd.",2ND FLOOR EXCELLENCE LOGISTICS BUIL DING NO.18...,22.508424,114.044487
1,1,wanli_industrial,"Datum Data Co., Ltd.",WANLI INDUSTRIAL PARK NO.2 FENGHUANG RD FUTIAN...,22.506351,114.050162
2,2,wenzhou,Zhejiang Gold Ox Industries & Trade,"Wenzhou, Zhejiang, China",28.070069,120.588637
3,3,veristrong_industrial,Internet Archive,Room 9-10 7 F Block A Veristrong Industrial Ce...,22.399645,114.193167
4,4,castle_industrial,Better World Books Ltd,Unit 10 Castle Industrial Estate Queensferry Road,56.055298,-3.439736
5,5,hvg_mandaue,Innoddata Knowledge Services Inc,Hvg It Park Subangdaku Mandaue City 6014 Cebu ...,10.318033,123.920545
6,6,meilin_ge,Datum Data Co Ltd,3Rd Fl Meilin Ge Meilin Road,22.568126,114.054168
7,7,gang_de_lu,Shenzhen Shenghangda International,"Gang De Lu, Long Gang Qu, Shen Zhen Shi, Guang...",,
8,8,300funston,Internet Archive,"300 Funston Avenue, San Francisco, CA 94118, USA",37.782455,-122.471569
9,9,2512florida,Internet Archive,2512 Florida Ave Richmond Ca94804 Usa,37.929504,-122.345987


In [4]:
shipments = pd.read_csv("https://raw.githubusercontent.com/ers6/ia_bols/main/shipments.csv")
shipments

Unnamed: 0.1,Unnamed: 0,supplier_location_id,company_location_id,weight_kg
0,0,18excellence,300funston,724
1,1,wanli_industrial,300funston,8200
2,2,wanli_industrial,300funston,16597
3,3,wanli_industrial,300funston,15471
4,4,wanli_industrial,300funston,44961
...,...,...,...,...
78,78,hvg_mandaue,298cherry,54162
79,79,hvg_mandaue,298cherry,57912
80,80,hvg_mandaue,298cherry,60576
81,81,hvg_mandaue,298cherry,96457


In [125]:
# Since these data are each more than 5,000 rows we'll import from the URLs
# Since these data are each more than 5,000 rows we'll import from the URLs

locations = "https://raw.githubusercontent.com/ers6/ia_bols/main/location_key.csv"

world = alt.topo_feature(data.world_110m.url, 'countries')

# sum of the total weights of goods shipped between locations
shipments = "https://raw.githubusercontent.com/ers6/ia_bols/main/shipments.csv"
ship = pd.read_csv(shipments).groupby(['supplier_location_id', 'company_location_id']).agg({'weight_kg':['sum']}).reset_index()
ship.columns = ['supplier_location_id', 'company_location_id', 'total_weight_shipped']
shipments = ship


# Create mouseover selection
select_city = alt.selection_single(
    on="mouseover", nearest=True, fields=["supplier_location_id"], empty="none"
)

# Define which attributes to lookup from airports.csv
lookup_data = alt.LookupData(
    locations, key="id", fields=["name", "lat", "lon"]
)
background = alt.Chart(world).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=800,
    height=600
).project('equalEarth')


connections = alt.Chart(shipments).mark_rule(opacity=0.35).encode(
    latitude="lat:Q",
    longitude="lon:Q",
    latitude2="lat2:Q",
    longitude2="lon2:Q"
).transform_lookup(
    lookup="supplier_location_id",
    from_=lookup_data
).transform_lookup(
    lookup="company_location_id",
    from_=lookup_data,
    as_=["name", "lat2", "lon2"]
).transform_filter(
    select_city
)

points = alt.Chart(shipments).mark_circle().encode(
    latitude="lat:Q",
    longitude="lon:Q",
    size=alt.Size("total_weight_shipped:Q", scale=alt.Scale(range=[0, 1000]), legend=None),
    tooltip=["name:N", "total_weight_shipped:Q"]
).transform_lookup(
    lookup="supplier_location_id",
    from_=lookup_data
).add_selection(
    select_city
)

ia = alt.Chart(locations).mark_circle().encode(
    latitude="lat:Q",
    longitude="lon:Q"
).transform_filter(
    alt.datum.name=="Internet Archive"
)

this_chart = (background + connections + points+ ia)

this_chart.save("/Users/elizabethschwartz/ers6.github.io/assets/json/suppliers_interactive_viz.json")

this_chart

  for col_name, dtype in df.dtypes.iteritems():


In [22]:
# sum of the total weights of goods shipped between locations
shipments = "https://raw.githubusercontent.com/ers6/ia_bols/main/shipments.csv"
ship = pd.read_csv(shipments).groupby(['supplier_location_id', 'company_location_id']).agg({'weight_kg':['sum']}).reset_index()
ship.columns = ['supplier_location_id', 'company_location_id', 'total_weight_shipped']


  ship.melt(id_vars=('supplier_location_id'), var_name='supplier', value_name='company_location_id')


Unnamed: 0,supplier_location_id,supplier,company_location_id
0,18excellence,Unnamed: 0,0
1,wanli_industrial,Unnamed: 0,1
2,wanli_industrial,Unnamed: 0,2
3,wanli_industrial,Unnamed: 0,3
4,wanli_industrial,Unnamed: 0,4
...,...,...,...
244,hvg_mandaue,weight_kg,54162
245,hvg_mandaue,weight_kg,57912
246,hvg_mandaue,weight_kg,60576
247,hvg_mandaue,weight_kg,96457


In [77]:
shipments

flights_airports

NameError: name 'flights_airports' is not defined

In [48]:
# Since these data are each more than 5,000 rows we'll import from the URLs
airports = data.airports.url
locations = "https://raw.githubusercontent.com/ers6/ia_bols/main/location_key.csv"
flights_airport = data.flights_airport.url


# sum of the total weights of goods shipped between locations
shipments = "https://raw.githubusercontent.com/ers6/ia_bols/main/shipments.csv"
ship = pd.read_csv(shipments).groupby(['supplier_location_id', 'company_location_id']).agg({'weight_kg':['sum']}).reset_index()
ship.columns = ['supplier_location_id', 'company_location_id', 'total_weight_shipped']
shipments = ship


world = alt.topo_feature(data.world_110m.url, 'countries')

# Create mouseover selection
select_city = alt.selection_single(
  
    on="mouseover", nearest=True, fields=["company_location_id"], empty="none"
)


selector=alt.selection_single(fields=['long', 'lat'], on='click', nearest=True)


# Define which attributes to lookup from airports.csv
lookup_data = alt.LookupData(
    locations, key="id", fields=["name", "lat", "lon"]
)

background = alt.Chart(world).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=800,
    height=600
).project('equalEarth')


connections = alt.Chart(shipments).mark_rule(color='green',opacity=0.75
).encode(
    latitude="lat:Q",
    longitude="lon:Q",
    latitude2="lat2:Q",
    longitude2="lon2:Q",
    size=alt.Size("total_weight_shipped:Q", scale=alt.Scale(range=[2, 8]), legend=None)
    
#     size = alt.Size("weight_kg:Q", scale=alt.Scale(range=[2,10]), legend=None), 
#     order=alt.Order("weight_kg:Q", sort="descending")
).transform_lookup(
    lookup="supplier_location_id",
    from_=lookup_data
).transform_lookup(
    lookup="company_location_id",
    from_=lookup_data,
    as_=["name", "lat2","lon2"]
).transform_filter(
    select_city
)
# alt.Chart(locations).mark_circle().encode(
points = alt.Chart(shipments).mark_circle().encode(
    latitude="lat:Q",
    longitude="lon:Q"
).transform_aggregate(
    routes="count()",
    groupby=["company_location_id"]
).transform_lookup(
lookup="company_location_id",
from_=lookup_data
).add_selection(
    select_city
)

suppliers = alt.Chart(locations).mark_circle(size=70, color='red', opacity=0.5).encode(
latitude="lat:Q",
longitude="lon:Q", 
tooltip = ["company_location_id:N"],
).transform_filter(
alt.datum.name != "Internet Archive"
)



(background + connections + suppliers + points).configure_view(stroke=None)


# w

  for col_name, dtype in df.dtypes.iteritems():


In [26]:
pd.read_csv(locations)

Unnamed: 0.1,Unnamed: 0,id,name,address,lat,lon
0,0,18excellence,"Tuling Info&Tech Co., Ltd.",2ND FLOOR EXCELLENCE LOGISTICS BUIL DING NO.18...,22.508424,114.044487
1,1,wanli_industrial,"Datum Data Co., Ltd.",WANLI INDUSTRIAL PARK NO.2 FENGHUANG RD FUTIAN...,22.506351,114.050162
2,2,wenzhou,Zhejiang Gold Ox Industries & Trade,"Wenzhou, Zhejiang, China",28.070069,120.588637
3,3,veristrong_industrial,Internet Archive,Room 9-10 7 F Block A Veristrong Industrial Ce...,22.399645,114.193167
4,4,castle_industrial,Better World Books Ltd,Unit 10 Castle Industrial Estate Queensferry Road,56.055298,-3.439736
5,5,hvg_mandaue,Innoddata Knowledge Services Inc,Hvg It Park Subangdaku Mandaue City 6014 Cebu ...,10.318033,123.920545
6,6,meilin_ge,Datum Data Co Ltd,3Rd Fl Meilin Ge Meilin Road,22.568126,114.054168
7,7,gang_de_lu,Shenzhen Shenghangda International,"Gang De Lu, Long Gang Qu, Shen Zhen Shi, Guang...",,
8,8,300funston,Internet Archive,"300 Funston Avenue, San Francisco, CA 94118, USA",37.782455,-122.471569
9,9,2512florida,Internet Archive,2512 Florida Ave Richmond Ca94804 Usa,37.929504,-122.345987


In [27]:
shipments

Unnamed: 0,supplier_location_id,company_location_id,total_weight_shipped
0,18excellence,2512florida,4131
1,18excellence,300funston,724
2,castle_industrial,2512florida,3142
3,castle_industrial,298cherry,15865
4,gang_de_lu,298cherry,3589
5,hvg_mandaue,298cherry,985603
6,hvg_mandaue,380carlson,330569
7,meilin_ge,298cherry,31790
8,veristrong_industrial,380carlson,202417
9,wanli_industrial,2512florida,39419


In [49]:
# version of the dashboard using full dataset and scan counts (not integrated with shipments yet)
from vega_datasets import data

selector=alt.selection_single(fields=['long', 'lat'], on='click', nearest=True)

# import point data
scan_center_url = "https://raw.githubusercontent.com/scanninglabor/IAScanningLabor/main/code/scans_per_center_per_year.csv"
# globe background

world = alt.topo_feature(data.world_110m.url, 'countries')

# US states background
background = alt.Chart(world).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=800,
    height=600
).project('equalEarth')


# scan center locations

points = alt.Chart(scan_center_url).mark_circle(
    size=20,
    color='steelblue'
).encode(
    longitude='long:Q',
    latitude='lat:Q', 
    tooltip=['name:N']
).add_selection(selector)

this_map = background + points


# making a bar chart for scans over time 

scans = alt.Chart(scan_center_url).mark_area(
    color='lightblue', 
    interpolate='step-after', 
    line=True).encode(
    x=alt.X('date:T', axis=alt.Axis(title="Months")),
    y=alt.Y('count:Q', axis=alt.Axis(title="Books Scanned"))
).transform_filter(selector).interactive()

draft_dash = this_map | scans

In [66]:
# Since these data are each more than 5,000 rows we'll import from the URLs
airports = data.airports.url
locations = "https://raw.githubusercontent.com/ers6/ia_bols/main/location_key.csv"
flights_airport = data.flights_airport.url


# sum of the total weights of goods shipped between locations
shipments = "https://raw.githubusercontent.com/ers6/ia_bols/main/shipments.csv"
ship = pd.read_csv(shipments).groupby(['supplier_location_id', 'company_location_id']).agg({'weight_kg':['sum']}).reset_index()
ship.columns = ['supplier_location_id', 'company_location_id', 'total_weight_shipped']
shipments = ship


world = alt.topo_feature(data.world_110m.url, 'countries')

# Create mouseover selection
select_city = alt.selection_single(
  
    on="mouseover", nearest=True, fields=["company_location_id"], empty="none"
)


selector=alt.selection_single(fields=['long', 'lat'], on='click', nearest=True)


# Define which attributes to lookup from airports.csv
lookup_data = alt.LookupData(
    locations, key="id", fields=["name", "lat", "lon"]
)

background = alt.Chart(world).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=800,
    height=600
).project('equalEarth')


connections = alt.Chart(shipments).mark_rule(color='green',opacity=0.75
).encode(
    latitude="lat:Q",
    longitude="lon:Q",
    latitude2="lat2:Q",
    longitude2="lon2:Q",
    size=alt.Size("total_weight_shipped:Q", scale=alt.Scale(range=[2, 8]), legend=None)
    
#     size = alt.Size("weight_kg:Q", scale=alt.Scale(range=[2,10]), legend=None), 
#     order=alt.Order("weight_kg:Q", sort="descending")
).transform_lookup(
    lookup="supplier_location_id",
    from_=lookup_data
).transform_lookup(
    lookup="company_location_id",
    from_=lookup_data,
    as_=["name", "lat2","lon2"]
).transform_filter(
    select_city
)
# alt.Chart(locations).mark_circle().encode(
points = alt.Chart(shipments).mark_circle().encode(
    latitude="lat:Q",
    longitude="lon:Q"
).transform_lookup(
    lookup="supplier_location_id",
    from_=lookup_data
).transform_lookup(
    lookup="company_location_id",
    from_=lookup_data

).add_selection(
    select_city
)

suppliers = alt.Chart(locations).mark_circle(size=70, color='red', opacity=0.5).encode(
latitude="lat:Q",
longitude="lon:Q", 
tooltip = ["company_location_id:N"],
).transform_filter(
alt.datum.name != "Internet Archive"
)



(background + connections + suppliers + points).configure_view(stroke=None)


# w

  for col_name, dtype in df.dtypes.iteritems():


In [126]:
# trying to make shit with a legend

# Since these data are each more than 5,000 rows we'll import from the URLs
# Since these data are each more than 5,000 rows we'll import from the URLs

locations = "https://raw.githubusercontent.com/ers6/ia_bols/main/location_key.csv"

world = alt.topo_feature(data.world_110m.url, 'countries')

# sum of the total weights of goods shipped between locations
shipments = "https://raw.githubusercontent.com/ers6/ia_bols/main/shipments.csv"
ship = pd.read_csv(shipments).groupby(['supplier_location_id', 'company_location_id']).agg({'weight_kg':['sum']}).reset_index()
ship.columns = ['supplier_location_id', 'company_location_id', 'total_weight_shipped']
shipments = ship

leg_select = alt.selection_multi(fields=['supplier_location_id', 'company_location_id'], bind='legend')

# Define which attributes to lookup from airports.csv
lookup_data = alt.LookupData(
    locations, key="id", fields=["name", "lat", "lon"]
)
background = alt.Chart(world).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=800,
    height=600
).project('equalEarth')


connections = alt.Chart(shipments).mark_rule(opacity=0.35).encode(
    latitude="lat:Q",
    longitude="lon:Q",
    latitude2="lat2:Q",
    longitude2="lon2:Q"
).transform_lookup(
    lookup="supplier_location_id",
    from_=lookup_data
).transform_lookup(
    lookup="company_location_id",
    from_=lookup_data,
    as_=["name", "lat2", "lon2"]
).transform_filter(
    leg_select
)

points = alt.Chart(shipments).mark_circle().encode(
    latitude="lat:Q",
    longitude="lon:Q",
    size=alt.Size("total_weight_shipped:Q", scale=alt.Scale(range=[0, 1000]), legend=None),
    tooltip=["name:N", "total_weight_shipped:Q"]
).transform_lookup(
    lookup="supplier_location_id",
    from_=lookup_data
).add_selection(
    leg_select
)

ia = alt.Chart(locations).mark_circle().encode(
    latitude="lat:Q",
    longitude="lon:Q"
).transform_filter(
    alt.datum.name=="Internet Archive"
)

this_chart = (background + connections + points+ ia)

this_chart

  for col_name, dtype in df.dtypes.iteritems():


In [123]:
pd.read_csv(locations)

Unnamed: 0.1,Unnamed: 0,id,name,address,lat,lon
0,0,18excellence,"Tuling Info&Tech Co., Ltd.",2ND FLOOR EXCELLENCE LOGISTICS BUIL DING NO.18...,22.508424,114.044487
1,1,wanli_industrial,"Datum Data Co., Ltd.",WANLI INDUSTRIAL PARK NO.2 FENGHUANG RD FUTIAN...,22.506351,114.050162
2,2,wenzhou,Zhejiang Gold Ox Industries & Trade,"Wenzhou, Zhejiang, China",28.070069,120.588637
3,3,veristrong_industrial,Internet Archive,Room 9-10 7 F Block A Veristrong Industrial Ce...,22.399645,114.193167
4,4,castle_industrial,Better World Books Ltd,Unit 10 Castle Industrial Estate Queensferry Road,56.055298,-3.439736
5,5,hvg_mandaue,Innoddata Knowledge Services Inc,Hvg It Park Subangdaku Mandaue City 6014 Cebu ...,10.318033,123.920545
6,6,meilin_ge,Datum Data Co Ltd,3Rd Fl Meilin Ge Meilin Road,22.568126,114.054168
7,7,gang_de_lu,Shenzhen Shenghangda International,"Gang De Lu, Long Gang Qu, Shen Zhen Shi, Guang...",,
8,8,300funston,Internet Archive,"300 Funston Avenue, San Francisco, CA 94118, USA",37.782455,-122.471569
9,9,2512florida,Internet Archive,2512 Florida Ave Richmond Ca94804 Usa,37.929504,-122.345987
