In [1]:
import altair as alt
import pandas as pd

In [2]:
ufo_data = "ufo-scrubbed-geocoded-time-standardized-00.csv"

In [3]:
df = pd.read_csv(ufo_data) 

columns = [
    'Datetime', 'City', 'State', 'Country', 'Shape', 'Duration_Seconds',
    'Duration', 'Description', 'Report_Date', 'Latitude', 'Longitude'
]

df.columns = columns

print(df)

               Datetime                  City State Country   Shape  \
0      10/10/1949 21:00          lackland afb    tx     NaN   light   
1      10/10/1955 17:00  chester (uk/england)   NaN      gb  circle   
2      10/10/1956 21:00                  edna    tx      us  circle   
3      10/10/1960 20:00               kaneohe    hi      us   light   
4      10/10/1961 19:00               bristol    tn      us  sphere   
...                 ...                   ...   ...     ...     ...   
80326    9/9/2013 21:15             nashville    tn      us   light   
80327    9/9/2013 22:00                 boise    id      us  circle   
80328    9/9/2013 22:00                  napa    ca      us   other   
80329    9/9/2013 22:20                vienna    va      us  circle   
80330    9/9/2013 23:00                edmond    ok      us   cigar   

       Duration_Seconds    Duration  \
0                7200.0     1-2 hrs   
1                  20.0  20 seconds   
2                  20.0    1/2

In [4]:
df.shape

(80331, 11)

In [5]:
print(df.dtypes)


Datetime             object
City                 object
State                object
Country              object
Shape                object
Duration_Seconds    float64
Duration             object
Description          object
Report_Date          object
Latitude            float64
Longitude           float64
dtype: object


In [6]:
df.head()

Unnamed: 0,Datetime,City,State,Country,Shape,Duration_Seconds,Duration,Description,Report_Date,Latitude,Longitude
0,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
1,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
2,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
3,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611
4,10/10/1961 19:00,bristol,tn,us,sphere,300.0,5 minutes,My father is now 89 my brother 52 the girl wit...,4/27/2007,36.595,-82.188889


In [7]:
subset_df = df.sample(n=4900, random_state=42)

print(subset_df.shape)
#used a random subset because I was getting number of rows larger than maximum error

(4900, 11)


In [8]:
scatter_chart = alt.Chart(subset_df).mark_circle(size=60).encode(
    x=alt.X('Duration_Seconds:Q', title='Duration in Seconds', scale=alt.Scale(domain=[0, 1000])),  
    # Limit x-axis to 1000 due to most of the data in duration_seconds being small numbers
    y=alt.Y('Latitude:Q', title='Latitude'),
    color=alt.Color('Shape:N', title='UFO Shape'),
).properties(
    title="UFO Sightings by Shape, Duration, and Location",
    width=800,
    height=500
).interactive()

scatter_chart

In the chart above I am visualizing UFO sightings based on latitude and duration_seconds(this is how long the UFO was
sighted for in seconds). I did have to limit the graph to only go up to 1000 seconds because most of the responses were 
between the range of 0 to 1000 so this way it gives a much clearer picture of the relationship between the two. For the X-axis was quantitative as the duration_seconds is numeric. The Y-axis was similiar. The color coding I used was based on the shape of the UFO becuase this allows us see differences and similarities in this chart based on UFO shape. This data was Nominal as shapes are categorical data. There were two encoding types used, position encoding for the x and y variables and color encoding for the shape variable.

In [9]:
myJekyllDir = './'

In [10]:
scatter_chart.save(myJekyllDir + 'ufo_data_scatter.json')

In [15]:
state_counts = subset_df['State'].value_counts().reset_index()
state_counts.columns = ['State', 'Sightings']
hover = alt.selection_single(fields=['State'],  on='mouseover', empty='none')

bar_chart = alt.Chart(state_counts).mark_bar().encode(
    x=alt.X('State:N', title='State', sort='-y'),
    y=alt.Y('Sightings:Q', title='Number of Sightings'),
    color=alt.condition(
        hover, alt.value('orange'), alt.value('lightblue') 
    ),
    tooltip=['State', 'Sightings']
).add_selection(
    hover  
).properties(
    title="Number of UFO Sightings by State",
    width=800,
    height=500
)
bar_chart

This visualization was a bit different.It visualized UFO sightings based on the number of UFO sightings per state. Of course it still uses the subset so it is only a representation of the overall dataset. The states were put on the X axis and this data was nominal as it was categorical data. On the Y axis there was the number of UFOS sightings and this was numerical so it used Q. For color coding I made everything lightblue which probably would not have been a good idea had it not been for the added interactive feature. I used the hover feature so when the mouse hovers over a certain bar in the bar chart it will change the color of that bar from light blue to orange. On its own this isn't very helpful but combined with the tooltip feature, which shows the exact number of sightings when the mouse hovers over it, it helps improve the visualization. When presenting this data or showing it to others it makes it much more clear and efficient when the specific state changes color and shows the exact number of sightings instead of having to approximate with the y-axis.

In [16]:
bar_chart.save(myJekyllDir + 'ufo_data_bar.json')

In [19]:
subset_df

Unnamed: 0,Datetime,City,State,Country,Shape,Duration_Seconds,Duration,Description,Report_Date,Latitude,Longitude
51411,6/24/1996 00:30,aurora,co,us,changed,3600.0,1 hour,Obj. hovered 100 ft above car. Red&#44 blue l...,11/2/1999,39.729444,-104.831389
25771,2/11/2011 13:43,lakewood,ca,us,circle,180.0,2-3 minutes,Circular UFO sighted above Lakewood/Cypress area,2/18/2011,33.853611,-118.133056
64176,8/10/1996 22:45,seattle,wa,us,triangle,600.0,10 minutes,Orange triangular object&#44 very distant&#44 ...,12/12/2011,47.606389,-122.330833
2623,10/17/2010 04:00,santa ana,ca,us,light,120.0,2 min,Green and Blue lights in the sky followed by t...,11/21/2010,33.745556,-117.866944
59591,7/25/1998 22:00,cascade range (central&#44 near north sister),or,,light,5.0,5seconds,Unusual large bright light lingering in sky fo...,1/29/2002,45.465509,-121.915016
...,...,...,...,...,...,...,...,...,...,...,...
56671,7/15/1979 01:30,cumberland,ky,us,other,120.0,2 minutes,Something raceing accross the sky up high with...,4/16/2005,36.978056,-82.988611
52658,6/30/1965 19:00,elkins,ar,us,disk,600.0,10 minutes,On a clear night you could see the saucer,8/5/2009,36.001389,-94.008056
69763,8/26/2003 22:00,north myrtle beach/cherry groove,sc,,light,10.0,10 seconds,We saw three large horizonal lights in a row o...,9/28/2003,33.827395,-78.642792
38607,4/23/1996 17:10,richmond,va,us,,360.0,6 min.,Woman&#44 other commuters on Ridgefield Rd. wi...,11/2/1999,37.553611,-77.460556
