# Workbook 07 Solution: UFOs by country and shape

In [1]:
from typing import List
import pathlib
import csv
import plotly.express as px
from cs103 import *

## Question

Do different countries show different trends for UFO sightings of different shapes? It would be interesting to compare histograms of sightings filtered by country and shape.

# Functions

In [2]:
def read_data(csv_file_path: pathlib.Path) -> List[List[str]]:
    """
    Returns a list of lists, representing each line in the file, at 'csv_file_path'.
    """
    with open(csv_file_path, encoding="utf-8") as file:
        csv_data = list(csv.reader(file, delimiter=","))
    csv_data_without_header_row = csv_data[1:]
    return csv_data_without_header_row

record_00 =['10/10/1949 20:30','san marcos','tx','us','cylinder','2700','This event took place in early fall around 1949-50. It occurred after a Boy Scout meeting in the Baptist Church. The Baptist Church sit','4/27/2004','29.8830556','-97.9411111']
record_500 = ['10/1/1966 23:00','corvallis','or','us','disk','120','Saucer with no sound flew 100 feet above ground&#44 changed direction then left atmosphere in a split second.','12/16/1999','44.5647222','-123.2608333']

ufo_data = pathlib.Path.cwd() / "Data" / "ufos.csv"

start_testing()
expect(read_data(ufo_data)[0], record_00)
expect(read_data(ufo_data)[500], record_500)
summary()

[92m2 of 2 tests passed[0m


In [3]:
from typing import NamedTuple, Optional

class UFOSighting(NamedTuple):
    year: int
    country: Optional[str] # sometimes this field is empty
    shape: str


# Interp. A single UFO sighting
# year: The year of the sighting
# country: The country where the observer saw the UFO as a two-character string, e.g. 'us', 'ca', 'jp', 'mx'
# shape: The perceived shape of the object

# Examples
UFOS1 = UFOSighting(1949, "us", "cylinder")
UFOS2 = UFOSighting(1966, "us", "disk")

In [4]:
def csv_record_to_ufo_sighting(record: List[float]) -> UFOSighting:
    """
    Returns a UFOSighting object representing the data in 'record'.
    """
    year = convert_datetime_to_year(record[0]) # I have not written this function yet; I will just assume it
    country = record[3]
    shape = record[4]
    return UFOSighting(year, country, shape)


# Here is where I now write my helper function
def convert_datetime_to_year(date_string: str) -> int:
    """
    Returns an int representing the year in YYYY format that may be contained
    within 'date_string'.
    Assumes that 'date_string' is in the format "DD/MM/YYYY HH:MM".
    Raise ValueError if the last four characters in the date cannot be
    converted to an int.
    """
    date, time = date_string.split(" ")
    year = date[-4:] # Get the last four characters
    return int(year)

## Testing section

# From looking at the data a little bit, I saw that there were these date formats possible...
date_0 = "04/5/1975 10:00"
date_1 = "04/05/2015 13:00"

start_testing()
# Test convert_date_field...
expect(convert_datetime_to_year(date_0), 1975)
expect(convert_datetime_to_year(date_1), 2015)

# Test csv_record_to_artwork...
expect(csv_record_to_ufo_sighting(record_00), UFOS1)
expect(csv_record_to_ufo_sighting(record_500), UFOS2)
summary()

[92m4 of 4 tests passed[0m


In [5]:
def csv_data_to_ufo_sightings(csv_data: List[List[str]]) -> List[UFOSighting]:
    """
    Returns a list of UFOSighting objects representing the cleaned data from each 
    record contained within 'csv_data'.
    """
    acc = []
    for record in csv_data:
        acc.append(csv_record_to_ufo_sighting(record))
    return acc

# Using the data from my previous tests, above, I will write a quick test for this function

LOR0 = []
LOR1 = [record_00]
LOR2 = [record_00, record_500]


LOUS0 = []
LOUS1 = [UFOS1]
LOUS2 = [UFOS1, UFOS2]
LOUS3 = [UFOS1, UFOS2, UFOS1]

start_testing()
expect(csv_data_to_ufo_sightings(LOR0), LOUS0)
expect(csv_data_to_ufo_sightings(LOR1), LOUS1)
expect(csv_data_to_ufo_sightings(LOR2), LOUS2)
summary()

[92m3 of 3 tests passed[0m


In [6]:
def filter_ufo_sightings_by_country(lous: List[UFOSighting], country: str) -> List[UFOSighting]:
    """
    Returns a list of UFOSighting filtered by 'country'.
    The 'country' is a two character string representing the country, 
    e.g. 'us', 'ca', 'jp', etc.
    Returns an empty list if no matching values are found.
    """
    acc = []
    for us in lous:
        if us.country == country.lower():
            acc.append(us)
    return acc

# Tests

start_testing()
expect(filter_ufo_sightings_by_country(LOUS0, []), [])
expect(filter_ufo_sightings_by_country(LOUS1, "us"), LOUS1)
expect(filter_ufo_sightings_by_country(LOUS1, "jp"), []) # Test the "unhappy path"
summary()

def filter_ufo_sightings_by_shape(lous: List[UFOSighting], shape: str) -> List[UFOSighting]:
    """
    Returns a list of UFOSighting filtered by .year_created. If the .year_created attribute
    matches 'year', then those UFOSighting objects will be returned in the list.
    """
    acc = []
    for us in lous:
        if us.shape == shape:
            acc.append(us)
    return acc

start_testing()
expect(filter_ufo_sightings_by_shape(LOUS0, "cylinder"), [])
expect(filter_ufo_sightings_by_shape(LOUS1, "cylinder"), [UFOS1])
expect(filter_ufo_sightings_by_shape(LOUS2, "disk"), [UFOS2])
expect(filter_ufo_sightings_by_shape(LOUS2, "triangle"), [])
summary()

[92m3 of 3 tests passed[0m
[92m4 of 4 tests passed[0m


In [13]:
def plot_ufo_sightings(lous: List[UFOSighting]) -> None:
    """
    Returns None. Plots the list of ArtWorks with the following axes:
    x = UFOSighting.year
    y = A count of the number of UFOSighting
    colour of marker = UFOSighting.shape
    """
    x_data = [us.year for us in lous]
    country = list(set([us.country for us in lous]))[0]
    shape = list(set([us.shape for us in lous]))[0]
    title = f"Sightings by year of {shape} shapes seen in the country {country}"
        
    # px.histogram automatically counts the frequency of years
    # This save us from having to create separate y-axis data
    # by manually counting up the number of occurrences of the years in the list
    plot = px.histogram(
        x=x_data, 
        title = f"Sightings by year of {shape} shapes seen in the country {country}"
    )
    
    # display() is a special function that only works in Jupyter
    # It's just like print() except it allows for rendering of rich media
    display(plot) 

In [26]:
def analyze_sightings(ufo_csv_file: pathlib.Path, country: str, shape: str) -> None:
    """
    Plots the data in 'ufo_csv_file' as a histogram by 'country' and by 'shape'.
    'country' - A two digit country code, e.g. "us", "au", "gb", "ca", "de"
    'shape' - A shape e.g. "disk", "cylinder", "triangle"
    """
    ufo_records = read_data(ufo_csv_file)
    ufo_sightings = csv_data_to_ufo_sightings(ufo_records)
    sightings_by_country = filter_ufo_sightings_by_country(ufo_sightings, country)
    sightings_by_shape = filter_ufo_sightings_by_shape(sightings_by_country, shape)
    plot_ufo_sightings(sightings_by_shape)

# Analyze Data

In [29]:
ufo_data = pathlib.Path.cwd() / "Data" / "ufos.csv"

analyze_sightings(ufo_data, "us", "disk")
analyze_sightings(ufo_data, "us", "cylinder")
analyze_sightings(ufo_data, "gb", "disk")
analyze_sightings(ufo_data, "gb", "cylinder")

## Reflections

This does not seem to be a very good representation of the data for comparision. Perhaps a different chart would be more appropriate.