# College News Keyword Visualizations

## Import libraries & load data
- Reads keyword/context csv and creates data frame


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt

# Reading a csv into a data frame

csvpath = "/Users/amcgrath1/learning/altair/cn-context-civil-rights.csv"
df = pd.read_csv(csvpath) # df variable represents a dataframe from the csv file
df.head(20) # preview the first 10 rows of your data frame

Unnamed: 0,filename,date,year,month,keyword,context
0,cn1962-03-14.txt,1962-03-14,1962,3,civil rights,"dom devéloped i oa whereas, in Europe, civil r..."
1,cn1950-05-10.txt,1950-05-10,1950,5,integration,This confederation aims at econ- omic integrat...
2,cn1960-02-17.txt,1960-02-17,1960,2,integration,be the best method of dealing with the need fo...
3,cn1960-02-17.txt,1960-02-17,1960,2,integration,omic integration of Western Eur-
4,cn1960-02-17.txt,1960-02-17,1960,2,civil rights,area of concern was in the field of civil righ...
5,cn1960-02-17.txt,1960-02-17,1960,2,civil rights,civil rights. ‘He said that strong civil right...
6,cn1956-02-22.txt,1956-02-22,1956,2,integration,Texans have yielded to the pres- sures for int...
7,cn1956-02-22.txt,1956-02-22,1956,2,segregation,"In May. of 1954, the Supreme Court of the Unit..."
8,cn1956-02-22.txt,1956-02-22,1956,2,segregation,Just as Miss Lucy will be readmitted to the Un...
9,cn1956-02-22.txt,1956-02-22,1956,2,segregation,taken by most Texans in regard to the segregat...


## Strip Plot: Keywords and Context
[See Altair documentation](https://altair-viz.github.io/gallery/stripplot.html)

In [10]:
alt.Chart(df, width=100).mark_circle(size=8).encode( #create the chart
    y=alt.Y('date:T', title="Date"), # sets x axis to 'date' column, expressed with year and month
    x=alt.X(
        'jitter:Q',
        title=None,
        axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
        scale=alt.Scale(),
        ), #for y axis
    color=alt.Color('keyword:N', legend=None),
    column=alt.Column(
        'keyword:N',
        header=alt.Header(
            titleOrient='top',
            labelOrient='bottom',
            labelAlign='right',
            labelPadding=3,
        )
    ),
    tooltip = [alt.Tooltip('date:T', title="Issue date"), # add a tooltip to show
            alt.Tooltip('keyword'),
            alt.Tooltip('context', title="Context")
    ]
).transform_calculate(
    jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
).configure_facet(spacing=0).configure_view(stroke=None)

#chart1

#chart1.save('table-bubble-wordcount-volume.html')


## Regular scatterplot with tooltips

In [4]:
alt.Chart(df).mark_circle(size=8).encode( #create the chart
    x=alt.X('date:T', title="Date"), # sets x axis to 'date' column, expressed with year and month
    y=alt.Y(
        'jitter:Q',
        title=None,
        axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
        scale=alt.Scale(),
        ), #for y axis
    color=alt.Color('keyword:N'),
    tooltip = [alt.Tooltip('date:T', title="Issue date"), # add a tooltip to show
            alt.Tooltip('keyword'),
            alt.Tooltip('context', title="Context")
    ]
).transform_calculate(
    jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
).configure_facet(spacing=0).configure_view(stroke=None)

## Steam Graph
NB: would work better if data were more continuous

In [5]:
alt.Chart(df).mark_area().encode(
    alt.X('year(date):T',
        axis=alt.Axis(format='%Y', domain=False, tickSize=0)
    ),
    alt.Y('count(keyword):Q', stack='center', axis=None),
    alt.Color('keyword:N',
        scale=alt.Scale()
    )
).interactive()


## Punchcard Scatterplot


In [7]:

alt.Chart(df).mark_circle().encode( #create the chart
    alt.X('date:T', title="Year"), # sets x axis to 'date' column, expressed with year and month
    alt.Y('month(date):N'
        ),
    alt.Color('keyword'),
    alt.Size('count(keyword):Q'), #size is dependent on # of pages
    tooltip = [alt.Tooltip('date:T', title="Issue date"), # add a tooltip to show
            alt.Tooltip('keyword'),
            alt.Tooltip('context', title="Context")
    ]
).transform_calculate(
    jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
).configure_facet(spacing=0).configure_view(stroke=None
).configure_scale(maxSize = 100.0, pointPadding = 1.0)
