In [None]:
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
%matplotlib inline

In [None]:
def reduce_identical_vals_list(l, array=False):
    first = l[0]
    if not array:
        all_same = all(first == x for x in l)
    else:
        all_same = all([np.array_equal(x, first) for x in l])
    if all_same == True:
        return first
    else:
        raise ValueError("list has disparate values")


In [None]:
filepath = "ACW_rounds.tsv"
df = pd.read_csv(filepath, sep='\t')

In [None]:
for i, key in enumerate(df.keys()):
    print(i, key)

In [None]:
keys_yes_no = ['Discernible solar azimuth?', 'Discernible shadow direction?', 'Rift(s)?', 'Discernible Google car/blur?', 'Discernible camera generation?', 'Other motor vehicle(s)?', 'License plate(s)?', 'Discernible driving side?', 'Road direction?', 'Curb(s)?', 'Visible road markings?',  'Utility poles?', 'Bollards / delineator posts?', 'Chevron sign(s)?', 'Object marker sign(s)?', 'Stop sign front?', 'Sign fronts?', 'Sign backs?', 'Guardrail(s)?', 'Fire hydrant?', 'Fence(s)?', 'Readable domain name(s)?', 'Area code(s)?', 'Flag(s)?', 'Trees/ grass/ vegetation?', 'Hills/ mountains?', 'Dirt/ soil?', 'Water?', 'Snow?', 'Buildings/ roofs?', 'Wall(s)?', 'Person(s)?', 'Animal(s)?', 'Writing?', 'Copyright watermark?']

In [None]:
vals_yes_no = {}
for key in keys_yes_no:
    val = df[key]
    if not set(val).issubset({'yes', 'no'}):
        print("Warning: '{}' has vals '{}'".format(key, set(val)))
    vals_yes_no[key] = val.value_counts()

In [None]:
vals_yes_no

In [None]:
other_keys = ['Driving side', 'Copyright year', 'Outer road line(s)', 'Center road line (s)', 'Camera']
leftover_keys = set(df.keys()) - (set.union(set(keys_yes_no), set(other_keys)))
print(leftover_keys)

In [None]:
percent_yes_dict = {}
n_rounds_list = []
for key, val in vals_yes_no.items():
    if not set(df[key]).issubset({'yes', 'no'}):
        print("Skipping: '{}', has vals '{}'".format(key, set(df[key])))
        continue
    this_percent = val['yes']/val.sum()
    n_rounds_list.append(val.sum())
    percent_yes_dict[key] = this_percent
percent_yes = pd.DataFrame({'Feature': percent_yes_dict.keys(), "Percent": percent_yes_dict.values()})

In [None]:
n_rounds = reduce_identical_vals_list(n_rounds_list)

In [None]:
percent_yes.sort_values('Percent', ascending=False)

In [None]:
features_list_sort = []
percent_list_sort = []
skip_list = ['Object marker sign(s)?']
for val in skip_list:
    if val not in percent_yes_dict:
        raise ValueError(val)
for i, (key, val) in enumerate(sorted(percent_yes_dict.items(), key=lambda x: x[1], reverse=True)):
    if key in skip_list:
        continue
    print("{}\t{}\t{:2.0%}".format(i, key, val))
    features_list_sort.append("{}. {}".format(i+1, key))
    percent_list_sort.append(val)
features_list_sort = list(reversed(features_list_sort))
percent_list_sort = list(reversed(percent_list_sort))
percent_sort = np.array(percent_list_sort)

In [None]:
fig, ax = plt.subplots(constrained_layout=True, figsize=(6.4, 4.8*2))
bars = ax.barh(features_list_sort, percent_sort*100, color='black')
ax.bar_label(bars, fmt='%.0f%%', label_type='edge', color='black', fontsize=14);
ax.set_title("Features in {} rounds of A Community World".format(n_rounds), fontsize=20, y=1.0, pad=-20, x=-0.12)
for label in ax.get_yticklabels():
    label.set_fontsize(14)
ax.set_xticklabels([]);
ax.set_xticks([])
ax.set_frame_on(False);

In [None]:
fig.canvas.draw();

In [None]:
fig.savefig(
    "features_percentage_barchart.png",
    bbox_inches='tight',
    dpi=200,
    facecolor="w", # white background
);
fig.savefig(
    "features_percentage_barchart.pdf",
    bbox_inches='tight',
    dpi=200,
    facecolor="w", # white background
);

In [None]:
plt.close(fig); del fig, ax;

In [None]:
latitude_l = []
longitude_l = []
for coord_str, hemisphere in zip(df['Coordinates'], df['Hemisphere']):
    try:
        latitude_str, longitude_str = coord_str.split(',')
    except:
        print("Coordinates: '{}'".format(coord_str))
        raise
    this_latitude = float(latitude_str)
    latitude_l.append(this_latitude)
    this_longitude = float(longitude_str)
    longitude_l.append(this_longitude)
    if hemisphere == 'Northern':
        assert this_latitude > 0
    elif hemisphere == 'Southern':
        assert this_latitude < 0
    else:
        raise ValueError
latitude = np.array(latitude_l)
longitude = np.array(longitude_l)

In [None]:
azimuth_count = {
    'match': 0,
    'mismatch': 0,
    'indeterminate': 0,
    'total': 0,
}
match_status = np.nan*np.zeros(len(df['Coordinates']))
for i, (hemisphere, azimuth, coord_str) in enumerate(zip(df['Hemisphere'], df['Solar azimuth N/S'], df['Coordinates'])):
    if azimuth == 'north':
        if hemisphere == 'Southern':
            azimuth_count['match'] += 1
            match_status[i] = 1
        elif hemisphere == 'Northern':
            azimuth_count['mismatch'] += 1
            match_status[i] = -1
        else:
            raise ValueError(hemisphere)
    elif azimuth == 'south':
        if hemisphere == 'Northern':
            azimuth_count['match'] += 1
            match_status[i] = 1
        elif hemisphere == 'Southern':
            azimuth_count['mismatch'] += 1
            match_status[i] = -1
    elif azimuth == '?':
        azimuth_count['indeterminate'] += 1
        match_status[i] = 0
    else:
        raise ValueError(azimuth)
    azimuth_count['total'] += 1

In [None]:
azimuth_count

In [None]:
azimuth_percent_dict = {
    'match': azimuth_count['match']/azimuth_count['total'],
    'mismatch': azimuth_count['mismatch']/azimuth_count['total'],
    'indeterminate': azimuth_count['indeterminate']/azimuth_count['total'],
}
azimuth_percent_vals = np.array(list(azimuth_percent_dict.values()))

In [None]:
azimuth_percent_vals

In [None]:
fig, ax = plt.subplots(constrained_layout=True, figsize=(6.4*0.7, 4.8*0.7))
bars = ax.barh(list(azimuth_percent_dict.keys()), azimuth_percent_vals*100, color=['tab:green', 'tab:orange', 'gray'])
ax.bar_label(bars, fmt='%.0f%%', label_type='edge', color='black', fontsize=14);
fig.suptitle("Using sun to guess hemisphere", fontsize=20)
ax.set_title("For {} rounds of A Community World".format(n_rounds))
for label in ax.get_yticklabels():
    label.set_fontsize(14)
ax.set_xticklabels([]);
ax.set_xticks([])
ax.set_frame_on(False);

In [None]:
fig.canvas.draw();

In [None]:
fig.savefig(
    "sun_hemisphere_barchart.png",
    bbox_inches='tight',
    dpi=200,
    facecolor="w", # white background
);
fig.savefig(
    "sun_hemisphere_barchart.pdf",
    bbox_inches='tight',
    dpi=200,
    facecolor="w", # white background
);

In [None]:
plt.close(fig); del fig, ax;

In [None]:
fig, ax = plt.subplots(constrained_layout=True)
ax.hist(latitude)
ax.set_xlim(-90, 90);

In [None]:
fig.canvas.draw();

In [None]:
plt.close(fig); del fig, ax;

In [None]:
fig, ax = plt.subplots(constrained_layout=True)
ax.hist(longitude)
ax.set_xlim(-180, 180);

In [None]:
fig.canvas.draw();

In [None]:
plt.close(fig); del fig, ax;

https://matplotlib.org/basemap/stable/users/mapsetup.html

https://matplotlib.org/basemap/stable/api/basemap_api.html

In [None]:
%%time
fig, ax = plt.subplots(figsize=(6.4*2, 4.8*2), dpi=200)
m = Basemap(
    projection='robin',
    lon_0=0, # central longitude
    resolution='c', # crude resolution coastlines
#     resolution='l', # low resolution coastlines
#     resolution='i', # intermediate resolution coastlines, slower
)
m.drawcoastlines(color='gray')
m.drawcountries(color='lightgray')
x, y = m(longitude, latitude)
m.scatter(x, y, marker='.', color='black', edgecolors='red', zorder=10)
ax.set_title("Robinson Projection, {} rounds of A Community World".format(n_rounds))
m.fillcontinents(color='white',lake_color='paleturquoise')
# draw parallels and meridians.
# m.drawparallels(np.arange(-90.,91.,30.))
# m.drawmeridians(np.arange(-180.,181.,60.))
m.drawmapboundary(fill_color='paleturquoise');

In [None]:
fig.canvas.draw();

In [None]:
fig.savefig("map_robinson_with_locations.png", bbox_inches='tight')
fig.savefig("map_robinson_with_locations.pdf", bbox_inches='tight')

In [None]:
del m, x, y

In [None]:
%%time
# plt.figure(figsize=(6.4*2, 4.8*2), dpi=200)
fig, ax = plt.subplots(figsize=(6.4*2, 4.8*2), dpi=200)
m = Basemap(projection='merc',
    llcrnrlat=-80,  # lower left corner latitude
    llcrnrlon=-180, # lower left corner longitude
    urcrnrlat=80,  # upper right corner latitude
    urcrnrlon=180, # upper right corner latitude
    lat_ts=20, # latitude of true scale
#     resolution='c', # crude resolution coastlines
    resolution='l', # low resolution coastlines
#     resolution='i', # intermediate resolution coastlines, slower
)
m.drawcoastlines(color='gray')
m.drawcountries(color='lightgray')
x, y = m(longitude, latitude)
m.scatter(x, y, marker='.', color='black', edgecolors='red', zorder=10)
ax.set_title("Mercator Projection, {} rounds of A Community World".format(n_rounds));
m.fillcontinents(color='white',lake_color='paleturquoise')
# draw parallels and meridians.
m.drawparallels(np.arange(-90.,91.,30.))
m.drawmeridians(np.arange(-180.,181.,60.))
m.drawmapboundary(fill_color='paleturquoise');

In [None]:
fig.savefig("map_mercator_with_locations.png", bbox_inches='tight')
fig.savefig("map_mercator_with_locations.pdf", bbox_inches='tight');

In [None]:
del m, x, y

In [None]:
fig, ax = plt.subplots(constrained_layout=True)
ax.scatter(longitude, latitude)
ax.set_xlim(-180, 180);
ax.set_ylim(-90, 90);

In [None]:
fig.canvas.draw();

In [None]:
plt.close(fig); del fig, ax;

In [None]:
fig, ax = plt.subplots(constrained_layout=True)
ax.scatter(latitude, match_status);
ax.set_xlim(-90, 90)
ax.axvline(0);

In [None]:
plt.close(fig); del fig, ax;

In [None]:
image_capture_datetime = [datetime.fromisoformat(val) for val in df['Image capture date']]
copyright_year_datetime1 = [datetime.fromisoformat(str(int(val))+'-01-01') for val in df['Copyright year']]
copyright_year_datetime2 = [datetime.fromisoformat(str(int(val))+'-12-31') for val in df['Copyright year']]

In [None]:
fig, ax = plt.subplots(figsize=(6.4*1.5, 6.4*1.5))
# ax.scatter(image_capture_datetime, copyright_year_datetime1);
# ax.scatter(image_capture_datetime, copyright_year_datetime2);
ax.plot([image_capture_datetime, image_capture_datetime], [copyright_year_datetime1, copyright_year_datetime2], '.-',
    color='black')
ax.plot(image_capture_datetime, image_capture_datetime, '-', color='gray', zorder=0, label='unity')
xfmt = matplotlib.dates.DateFormatter('%Y-%m')
ax.xaxis.set_major_formatter(xfmt)
fig.autofmt_xdate();
ax.set_aspect('equal')
ax.set_xlabel('Image capture date')
ax.set_ylabel('Copyright year');
# fig.suptitle("Copyright watermark and image capture date")
ax.set_title("{} rounds of A Community World".format(n_rounds))
ax.legend();

In [None]:
fig.canvas.draw();

In [None]:
fig.savefig(
    "capture_date_vs_copyright_year.png",
    bbox_inches='tight',
    dpi=200,
    facecolor="w", # white background
);
fig.savefig(
    "capture_date_vs_copyright_year.pdf",
    bbox_inches='tight',
    dpi=200,
    facecolor="w", # white background
);

In [None]:
plt.close(fig); del fig, ax;

In [None]:
# TODO: pavement materials
# TODO: confidence intervals