# Temporal and Spatial Data Visualization
This is a personal practice of data visulization with various python libraries and open access data.

@ Shunan Feng

## 1. Global Annual Temperature Anomaly Interactive scatter plots with lowess trendlines

Reference:
https://plotly.com/python/linear-fits/ 

Data source: NASA's Goddard Institute for Space Studies (GISS). Credit: NASA/GISS
https://climate.nasa.gov/vital-signs/global-temperature/

In [1]:
import plotly.express as px
import pandas as pd

df = pd.read_table(r'https://data.giss.nasa.gov/gistemp/graphs/graph_data/Global_Mean_Estimates_based_on_Land_and_Ocean_Data/graph.txt',
                   skiprows=[0,1,2,4], sep='\s+')
df = df.rename(columns={"No_Smoothing": "temperature anomaly", "Lowess(5)": "lowess"})
fig = px.scatter(df, x="Year", y="temperature anomaly", trendline="lowess")
fig.update_layout(
    xaxis_title="Year",
    yaxis_title="Temperature Anomaly (\u2103)")
fig.update_xaxes(rangeslider_visible=True)
fig.show()

## 2. COVID-19 
### 2.1 Total tests for COVID 19
Spatial bubble map of total number of testing for COVID-19.

- Map with plotly: https://plotly.com/python/bubble-maps/;
- Testing for COVID-19: this data is collected by the Our World in Data team from official reports.
https://github.com/owid/covid-19-data/tree/master/public/data
- Note: time slider is temporarily disabled since the timestamp didn't match in the dataset.
 

In [2]:
import pandas as pd
import plotly.express as px

df = pd.read_excel('https://covid.ourworldindata.org/data/owid-covid-data.xlsx')
df = df.dropna(subset=['total_tests_per_thousand'])    

fig = px.scatter_geo(df, locations="iso_code", color="location",
                     hover_name="location", size="total_tests_per_thousand",
                    #  animation_group="location", animation_frame="date", 
                     projection="natural earth")
fig.update_layout(title_text="Total Tests per Thousand People")
fig.show()


### 2.2 Global Total confirmed cases
- Plot with altair: https://altair-viz.github.io/gallery/airports_count.html
- Data: COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University https://github.com/CSSEGISandData/COVID-19
- Note: the coordinates is calculated by taking the average so the location of some countries may lie outside its main continental area.

In [4]:
import altair as alt
import pandas as pd
from vega_datasets import data

urlcovid = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
df = pd.read_csv(urlcovid)
dfcoor = df.groupby('Country/Region').mean()
df = df.groupby('Country/Region').sum()
df['sum'] = df.iloc[:,-1]
df['latitude'] = dfcoor['Lat']
df['longitude'] = dfcoor['Long']

# prepare background map
source = alt.topo_feature(data.world_110m.url, 'countries')
background = alt.Chart(source).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=600, height=400
).project('naturalEarth1')
# plot point data
points = alt.Chart(df).mark_circle().encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    size=alt.Size('sum:Q', title='Number of Confirmed Cases'),
    color=alt.value('steelblue'),
    tooltip=['countries:N','sum:Q']
).properties(
    title='Global Confirmed COVID-19 Cases'
)
background + points

# 3.Time Series Plots
## 3.1 Time series bubble plots
- Plots with altair: https://altair-viz.github.io/gallery/natural_disasters.html, here we added totoal number of deaths in addition.
- Natural disasters from 1900 to 2019 - EMDAT (2020): https://github.com/owid/owid-datasets/tree/master/datasets/Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020)
- Hannah Ritchie and Max Roser (2020) - "Causes of Death". Published online at OurWorldInData.org. Retrieved from: 'https://ourworldindata.org/causes-of-death' [Online Resource]

In [15]:
from vega_datasets import data
import altair as alt
import pandas as pd

df = pd.read_csv(r'https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Global%20death%20rates%20from%20disasters%20(EMDAT%3B%20UN%20%26%20HYDE)/Global%20death%20rates%20from%20disasters%20(EMDAT%3B%20UN%20%26%20HYDE).csv')


alt.Chart(df).mark_circle(
    opacity=0.8,
    stroke='black',
    strokeWidth=1
).encode(
    alt.X('Year:O', axis=alt.Axis(labelAngle=0)),
    alt.Y('Entity:N'),
    alt.Size('Global death rates from natural disasters:Q',
        scale=alt.Scale(range=[0, 4000]),
        legend=alt.Legend(title='Global death rates from natural disasters')
    ),
    alt.Color('Entity:N', legend=None)
).properties(
    width=450,
    height=320
).transform_filter(
    alt.datum.Entity != 'Global death rates from natural disasters'
)

In [14]:
from vega_datasets import data
import altair as alt
import pandas as pd

df = pd.read_excel('dataVisual.xlsx',sheet_name='cause of death 1990-2017')

alt.Chart(df).mark_circle(
    opacity=0.8,
    stroke='black',
    strokeWidth=1
).encode(
    alt.X('Year:O', axis=alt.Axis(labelAngle=0)),
    alt.Y('Cause:N'),
    alt.Size('Death:Q',
        scale=alt.Scale(range=[0, 4000]),
        legend=alt.Legend(title='World Cause of Death 1990-2017')
    ),
    alt.Color('Cause:N', legend=None)
).properties(
    width=600,
    height=800
).transform_filter(
    alt.datum.Entity != 'Death'
)

## 3.2 Drought heatmap application
This is an example of showing the heatmap of drought condition in Ethiopia from Standard Precipitation and Evapotranspiration Index  (SPEI). 
- SPEI: http://spei.csic.es/map/maps.html
- plots with altair: https://altair-viz.github.io/gallery/weather_heatmap.html
- color scheme: https://vega.github.io/vega/docs/schemes/

In [14]:
import altair as alt
import pandas as pd

df = pd.read_excel('dataVisual.xlsx', sheet_name='SPEI')
alt.Chart(
    df,
    title="Drought condition in Ethiopia 1950-2020"
).mark_rect().encode(
    x='year(time):O',
    y='month(time):O',
    color=alt.Color('SPEI_6:Q', scale=alt.Scale(scheme="redyellowgreen")),
    tooltip=[
        alt.Tooltip('yeardate(time):T', title='Date'),
        alt.Tooltip('SPEI_11:Q', title='SPEI6m')
    ]
).properties(width=550,height= 150)

## 3.3 Storglaciären mass balance Record
- glacier mass balance data: https://bolin.su.se/data/tarfala/
- below are two examples of the same purposes.
    * https://altair-viz.github.io/gallery/bar_chart_with_negatives.html
    * https://plotly.com/python/bar-charts/

In [8]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

df = pd.read_table('https://bolin.su.se/data/tarfala/data/glaciers/storglaciaren/storglaciaren_mass_balance_series.txt',
                   skiprows=7,sep='\s+')

x = df.Year
df['Bs'] = df['Bs'] * -1

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Bar(x=x, y=df.Bn, name = 'Net mass balance'),
    secondary_y=False)
fig.add_trace(
    go.Bar(x=x, y=df.Bs, name = 'Summer mass balance'),
    secondary_y=False)
fig.add_trace(
    go.Bar(x=x, y=df.Bw, name = 'Winter mass balance'),
    secondary_y=False)
fig.add_trace(
    go.Scatter(x=x, y=df.ELA, name="Equilibrium line altitude"),
    secondary_y=True
)

fig.update_layout(
    barmode='relative',
    # xaxis_title="Year",
    yaxis_title="mass balance (m w.e.)",
    title_text='Storglaciären mass balance',
    width=800,
    height=400,
    legend_orientation="h")
fig.update_yaxes(title_text=" Equilibrium line altitude (m a.s.l.)", secondary_y=True)
fig.show()

In [26]:
import altair as alt
import pandas as pd

df = pd.read_table('https://bolin.su.se/data/tarfala/data/glaciers/storglaciaren/storglaciaren_mass_balance_series.txt',
                   skiprows=7,sep='\s+')
df['Bs'] = df['Bs'] * -1
bnchart = alt.Chart(df).mark_bar().encode(
    x="Year:O",
    y="Bn:Q",
    color=alt.condition(
        alt.datum.Bn > 0,
        alt.value("steelblue"),  # The positive color
        alt.value("red")  # The negative color
    )
).properties(width=600)

bwchart = alt.Chart(df).mark_bar(
    opacity = 0.3, color = 'blue'
).encode(
    x="Year:O",
    y="Bw:Q"
).properties(width=600)

bschart = alt.Chart(df).mark_bar(
    opacity = 0.3, color = 'orange'
).encode(
    x="Year:O",
    y="Bs:Q"
).properties(width=600)

bnchart + bwchart + bschart