# Plot Camera Spec Summary

Finally, it's time to plot all of the data! I'll plot three key pieces of information:

1. The distribution of all focal lengths in the data.
2. The distribution of all f-stops in the data.
3. The distribution of all iso settings in the data.
4. A summary figure plotting camera popularity against cost.

In [1]:
import pandas as pd
import altair as alt

In [2]:
camera_statistics = pd.read_csv('../results/camera_statistics.csv')
camera_metadata = pd.read_csv('../results/camera_metadata.csv')

## What are the most common settings used to take photos?

In [3]:
# Plot the distribution of focal lengths
camera_metadata['focal_length_numeric'] = camera_metadata['focal_length'].str.extract(r'(\d+)').astype(float)
# Calculate the 10th and 90th percentiles (which bounds 80% of the data)
lower_bound = camera_metadata['focal_length_numeric'].quantile(0.1)
upper_bound = camera_metadata['focal_length_numeric'].quantile(0.9)
focal_length_title = f"80% of the photos are taken between {int(lower_bound)}mm and {int(upper_bound)}mm zoom."
# Plot the histogram of focal lengths
focal_length_hist = alt.Chart(camera_metadata).mark_bar(color="rebeccapurple").encode(
    x=alt.X('focal_length_numeric:Q', 
            bin=alt.Bin(maxbins=20),
            title='Focal Length (mm)'
           ),
    y=alt.Y('count()', title='Count of Photos'),
    tooltip=[alt.Tooltip('focal_length_numeric:Q', title='Focal Length (mm)'),
             alt.Tooltip('count()', title='Count of Photos')]
).properties(
    title=focal_length_title,
    width=500,
    height=250
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16
)
focal_length_hist

In [4]:
focal_length_hist.save('../results/plots/focal_length_hist.html')

In [5]:
# Plot the distribution of f-stops
camera_metadata['f_stop_numeric'] = camera_metadata['f-stop'].str.extract(r'(\d+)').astype(float)
# Calculate the 10th and 90th percentiles (which bounds 80% of the data)
lower_bound = camera_metadata['f_stop_numeric'].quantile(0.1)
upper_bound = camera_metadata['f_stop_numeric'].quantile(0.9)
aperture_title = f"80% of the photos are taken between f/{int(lower_bound)} and f/{int(upper_bound)}."
# Plot the distribution of f-stops
aperture_hist =alt.Chart(camera_metadata).mark_bar(color="forestgreen").encode(
    x=alt.X('f_stop_numeric:Q', 
            bin=alt.Bin(maxbins=20),
            title='Aperture (f/number)'
           ),
    y=alt.Y('count()', title='Count of Photos'),
    tooltip=[alt.Tooltip('f_stop_numeric:Q', title='Aperture (f/number)'),
             alt.Tooltip('count()', title='Count of Photos')]
).properties(
    title=aperture_title,
    width=500,
    height=250
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16
)
aperture_hist

In [6]:
aperture_hist.save('../results/plots/aperture_hist.html')

In [7]:
# Calculate the 10th and 90th percentiles (which bounds 80% of the data)
lower_bound = camera_metadata['iso'].quantile(0.1)
upper_bound = camera_metadata['iso'].quantile(0.9)
iso_title = f"80% of the photos are taken between iso {int(lower_bound)} and iso {int(upper_bound)}."
# Plot the distribution of iso
iso_hist = alt.Chart(camera_metadata).mark_bar(color="darkgoldenrod").encode(
    x=alt.X('iso:Q', 
            bin=alt.Bin(maxbins=50),
            title='iso'
           ),
    y=alt.Y('count()', title='Count of Photos'),
    tooltip=[alt.Tooltip('iso:Q', title='iso'),
             alt.Tooltip('count()', title='Count of Photos')]
).properties(
    title=iso_title,
    width=500,
    height=250
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16
)
iso_hist

In [8]:
iso_hist.save('../results/plots/iso_hist.html')

## Summary figure of all cameras

In [9]:
camera_statistics["megapixels"] = camera_statistics["megapixels"].astype(str) + "px"
camera_statistics["usd_cost"] = camera_statistics["cost"].astype(int).astype(str) + "$"

In [12]:
brand_selection = alt.selection_point(fields=['brand'], bind='legend')
summary_scatter = (
    alt.Chart(camera_statistics)
    .mark_point(
        filled=True,
        size=100
    )
    .encode(
        y=alt.Y('count:Q', 
                title='Popularity (Count)',
                scale=alt.Scale(domainMax=camera_statistics["count"].max() + 10, domainMin=0)
            ),
        x=alt.X('cost:Q',
                title='Cost (USD)',
                scale=alt.Scale(domainMax=camera_statistics["cost"].max() + 100, domainMin=0)
                ),
        tooltip=[
            alt.Tooltip('id:N', title='Camera:'),
            alt.Tooltip('usd_cost:N', title='Cost (USD):'),
            alt.Tooltip('popular_lens:N', title='Most Popular Lens:'),
            alt.Tooltip('megapixels:N', title='Megapixels:'),
            ],
        color=alt.Color('brand:N', scale=alt.Scale(scheme='category20'), legend=alt.Legend(title='Brand')),
        opacity=alt.when(brand_selection).then(alt.value(1)).otherwise(alt.value(0)),
        href='best_photo_link:N'
    )
    .add_params(
        brand_selection
    )
    .properties(
        width=500,
        height=250,
        title='Cost vs. Popularity of All Cameras',
    )
    .configure_axis(
        labelFontSize=12,
        titleFontSize=14
    )
    .configure_title(
        fontSize=16
    )
    .interactive()
)
# Open a new tab to display the images
summary_scatter['usermeta'] = {
    "embedOptions": {
        'loader': {'target': '_blank'}
    }
}
summary_scatter

In [11]:
summary_scatter.save('../results/plots/summary_scatter.html')