# **Coursework 1: Information Visualisation**

# Phase C

Sam Tudberry - 1907632

Yong Kien Lin - 2014480

Joseph Henry - 2035032

# Imports and Data

In [35]:
import altair as alt
import pandas as pd
from vega_datasets import data


In [36]:
#OpenFlights airport data.
airports = pd.read_csv("airports.csv", names=['AirportID', 'name', 'city', 'Country', 'IATA', 'ICAO', 'latitude', 'longitude', 'Altitude','Timezone', 'DST','Tz database timezone', 'Type','Source'])
routes = pd.read_csv("routes.csv", names=['airline', 'airlineID','source airport', 'source airport ID', 'destination airport', 'destination airport ID', 'codeshare', 'stops', 'equipment'])

#HDX US airport data by OurAirports.
US_Airport_Data = pd.read_csv("us-airports.csv")
US_Airport_Data = US_Airport_Data.drop(US_Airport_Data.index[0])


#Kaggle COVID-19 in USA data by SRK.
covid_counties = pd.read_csv("us_counties_covid19_daily.csv")
states = alt.topo_feature(data.us_10m.url, feature='states')


In [37]:
#Working with datasets

USData_airports = pd.merge(US_Airport_Data[['name','region_name']], airports, on='name', how='outer')

USData_airports = USData_airports[USData_airports.Country == 'United States']

USData_airports=USData_airports.dropna()

USData_airports = USData_airports.rename(columns={'region_name': 'state'})

outgoing = routes.groupby(['source airport ID']).size()
outgoing = pd.DataFrame({'AirportID':outgoing.index, 'Outgoing Flights':outgoing.values})
incoming = routes.groupby(['destination airport ID']).size()
incoming = pd.DataFrame({'AirportID':incoming.index, 'Incoming Flights':incoming.values})
USData_airports=USData_airports.astype({'AirportID': 'int32'})
USData_airports=USData_airports.astype({'AirportID': 'string'})
USData_airports = pd.merge(USData_airports, outgoing, on='AirportID', how='outer')
USData_airports = pd.merge(USData_airports, incoming, on='AirportID', how='outer')
USData_airports['Total Flights'] = USData_airports['Outgoing Flights'] + USData_airports['Incoming Flights']
USData_airports = USData_airports[USData_airports['name'].notna()]

TotalFlights = USData_airports.groupby(by='state')['Total Flights'].sum()
TotalFlights = pd.DataFrame({'state':TotalFlights.index, 'Total Flights':TotalFlights.values})
covid_flights = pd.merge(covid_counties, TotalFlights, on='state', how='outer')

# View 1 & 2

In [38]:
#VIEW 1 & 2.

#Click interactions.
click = alt.selection_single(fields=['state'], init={'state':'New York'}, empty='none')
click2 = alt.selection_single(fields=['name'])

#Airport markers (View 1).
airports = alt.Chart(USData_airports).mark_circle(
    color='red',
).encode(
    latitude="latitude:Q",
    longitude="longitude:Q",
    tooltip=[alt.Tooltip('name:N', title="Airport Name"), alt.Tooltip('city:N', title="City"), alt.Tooltip('state:N', title="State")],
    size=alt.Size("Total Flights:Q", scale=alt.Scale(range=[0, 1000]), legend=alt.Legend(
        orient='none',
        legendX=-0, legendY=-0,
        direction='vertical',
        titleAnchor='start')),
    color=alt.condition(
        click2,
        alt.value('red'),
        alt.value('grey')
    ),
).transform_filter(
    "datum.Country == 'United States'"
)

#USA Map (View 2).
geo_map = alt.Chart(states).mark_geoshape().encode(
    color=alt.condition(
        click,
        alt.value('#1f77b4'),
        alt.Color('cases:Q', scale=alt.Scale(scheme='greys', domain=[0,1400000]), legend=alt.Legend(
        tickCount=8,
        orient='none',
        direction='vertical',
        titleAnchor='start',
        legendX=805),
        title='Number of Cases')
    ),
    tooltip=[alt.Tooltip('state:N', title = 'State'), alt.Tooltip('cases:Q', title = 'Cases')]
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(covid_counties, 'id', list(covid_counties.columns))
).transform_filter(
    "datum.county == 'All'"
).project(
    type='albersUsa'
).properties(
    title='Airports of the US and Total COVID-19 Cases by State As of December 5th 2020',
    width=800,
    height=500,
).add_selection(
    click
)

# Additional text
view1Text1 = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=600,
).encode(
    x=alt.value(250),
    y=alt.value(20),
    text=alt.value(["Individual airports represented by a red circle"])
)

view1Text2 = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=600,
).encode(
    x=alt.value(285),
    y=alt.value(500),
    text=alt.value(["Mouse over items for more detail"])
)

view1Text3 = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=12,
    fontWeight=600,
).encode(
    x=alt.value(340),
    y=alt.value(35),
    text=alt.value(["Click to select a state"])
)

# View 3

In [39]:
#VIEW 3.

# Interactive Scatter Plot with selection.
# View showing flights vs cases via scatterplot.
scatter = alt.Chart(covid_flights).mark_point().encode(
    x=alt.X("cases:Q", title='Total COVID-19 cases of state'),
    y=alt.Y('Total Flights:Q', title='Total routes of state'),
    tooltip=[alt.Tooltip('state:N', title = 'State'), alt.Tooltip('cases:N', title = 'Cases')],
    color=alt.condition(
        click,
        alt.value('#1f77b4'),
        alt.value('gray')
    ),
    opacity=alt.condition(click, alt.value(1), alt.value(0.5))
).transform_filter(
    "datum.county == 'All'"
).properties(
    width=600,
)

# Additional text
view2Text1 = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=600
).encode(
    x=alt.value(120),
    y=alt.value(15),
    text=alt.value(["COVID-19 Cases Against Total Flight Routes per State"])
)

view2Text2 = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=12,
    fontWeight=600
).encode(
    x=alt.value(121),
    y=alt.value(30),
    text=alt.value(["Zoom, pan, select and mouse over points for more information"])
)

# Selection text
view2Text3 = scatter.mark_text(
    align='center',
    baseline='middle',
    fontSize=14,
    dy=-10
).encode(
    text='state:N'
).add_selection(
    click
).transform_filter(
    click
).interactive()


# View 4

In [40]:
#VIEW 4.

# Ordered Bar Chart with selection.
# View displaying airports in a state + their outgoing + incoming flights.
stateAirports = alt.Chart(USData_airports,width=200,height=299).mark_bar().encode(
    y=alt.Y('name:N', sort='-x', title='Airport'),
    x=alt.X("Total Flights:Q", title='	Outbound and inbound routes'),
    color=alt.condition(
        click2,
        alt.value('red'),
        alt.value('gray')
    ),
    opacity=alt.condition(click2, alt.value(1), alt.value(0.5)),
    tooltip=[alt.Tooltip('name:N', title = 'Aiport Name'), alt.Tooltip('Total Flights:Q', title = 'Total Routes')],
).transform_window(
    rank='rank(Total Flights)',
    groupby=['state'],
    sort = [alt.SortField('Total Flights', order = 'descending')]
).transform_filter(
    alt.datum.rank <= 10,
).transform_filter(
    'isValid(datum["Total Flights"])'
).transform_filter(
    click
)

# Additional text
view3Text1 = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=14,
    fontWeight=600
).encode(
    x=alt.value(0),
    y=alt.value(-15),
    text=alt.value(["Busiest Airports of Selected State"])
)

view3Text2 = alt.Chart().mark_text(
    align="left",
    baseline="bottom",
    fontSize=12,
    fontWeight=600
).encode(
    x=alt.value(0),
    y=alt.value(0),
    text=alt.value(["Click a bar to highlight the airport"])
)

view3Text3 = stateAirports.mark_text(
    align='left',
    baseline='middle',
    fontSize=16,
    dx=3
).encode(
    text='Total Flights:Q'
).add_selection(
    click2
)

# Final Visualisation

In [41]:
#Draw visualisation.

((geo_map+airports+view1Text1+view1Text2+view1Text3)&(scatter+view2Text1+view2Text2+view2Text3|stateAirports+view3Text1+view3Text2+view3Text3)).properties().configure_title(
    fontSize=20,
    anchor='middle',
    color='black'
)
