In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

from scipy.stats import lognorm, uniform

import steme.dataset as dataset
import steme.loader as loader

In [None]:
import matplotlib
matplotlib.rc('xtick', labelsize=18) 
matplotlib.rc('ytick', labelsize=18) 
matplotlib.rc('axes', labelsize=18)
matplotlib.rc('legend', fontsize=16)
matplotlib.rc('figure', titlesize=16)

In [None]:
def gtzan_data():
    import mirdata
    gtzan = mirdata.initialize("gtzan_genre",
            data_home="../../datasets/gtzan_genre",
            version="default")
    tracks = gtzan.track_ids
    tracks.remove("reggae.00086")
    tempi = [gtzan.track(track_id).tempo for track_id in tracks]

    return gtzan, tracks, tempi

dist_low = lognorm.rvs(0.25, loc=30, scale=50, size=1000, random_state=42)
dist_medium = lognorm.rvs(0.25, loc=70, scale=50, size=1000, random_state=42)
dist_high = lognorm.rvs(0.25, loc=120, scale=50, size=1000, random_state=42)
dist_uniform = uniform.rvs(30, scale=210,size=1000, random_state=42)
dist_log_uniform = 30*np.e**(np.random.rand(1000)*np.log(240/30))
_, _, dist_gtzan = gtzan_data()
dist_gtzan = np.array(dist_gtzan)

In [None]:
from collections import Counter

In [None]:
theta = dataset.variables_non_linear(25, 40, 190)
bins = theta[(theta > 30) & (theta < 370)][::2]

In [None]:
init = lambda x: 25 * 2.0 ** (x / 40)
end = lambda y:  25 * 2.0 ** ((128+y-1) / 40)
init(11), end(11)

In [None]:
# test = dist_gtzan[(dist_gtzan > 90) & (dist_gtzan < 240)]

# theta[(theta > 30)]

In [None]:
colors = plt.rcParams["axes.prop_cycle"]()

In [None]:
cmap = matplotlib.cm.get_cmap('tab10')

In [None]:
cmap

In [None]:
fig, ax = plt.subplots(2,1, figsize=(15,8))

kwargs = {
    "alpha": 0.7,
    "histtype": "stepfilled"
}

ax[0].hist(dist_low, bins=50, label="lognorm @ 70", edgecolor="black", color=cmap.colors[0], **kwargs)
ax[0].hist(dist_medium, bins=50, label="lognorm @ 120", edgecolor="black",color=cmap.colors[2], **kwargs)
ax[0].hist(dist_high, bins=50, label="lognorm @ 170", edgecolor="black",color=cmap.colors[4],**kwargs)
ax[0].hist(dist_log_uniform, bins=50, label="log uniform", edgecolor="black", color=cmap.colors[3],**kwargs)
ax[0].grid(True, axis="x", alpha=0.7)
ax[0].set_xticks(np.arange(30, 340, 20))
ax[0].title.set_text("Linear axis")
ax[0].title.set_fontsize(20)
ax[0].set_xlim(20, 340)
# ax[0].legend(loc="upper right")
plt.setp(ax[0], xticklabels=[])

# ax[1].hist(dist_gtzan, bins=50, label="GTZAN", edgecolor="black", color=cmap.colors[8],**kwargs)
# ax[1].set_xticks(np.arange(30, 340, 20))
# ax[1].grid(True, axis="x", alpha=0.7)
# ax[1].set_xlim(20, 340)

# ax[1].title.set_text("Linear axis")

ax[1].hist(dist_low, bins=bins, label="lognorm @ 70", edgecolor="black", color=cmap.colors[0],**kwargs)
ax[1].hist(dist_medium, bins=bins, label="lognorm @ 120", edgecolor="black",color=cmap.colors[2], **kwargs)
ax[1].hist(dist_high, bins=bins, label="lognorm @ 170", edgecolor="black",color=cmap.colors[4], **kwargs)
ax[1].hist(dist_log_uniform, bins=bins, label="log uniform", edgecolor="black", color=cmap.colors[3],**kwargs)
# ax[2].hist(dist_gtzan, bins=bins, label="GTZAN", edgecolor="black", color=cmap.colors[8], **kwargs)
ax[1].title.set_text("Logarithmic axis")
ax[1].title.set_fontsize(20)
ax[1].grid(True, axis="x", alpha=0.7)
plt.xscale('log')

ax = plt.gca()
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, loc="upper right", bbox_to_anchor=(0.4955, 0.465, 0.5, 0.5), framealpha=1)
ax.set_xticks([], [])
ax.set_xticks(np.round(bins[::4]))
ax.xaxis.set_major_formatter(ticker.ScalarFormatter())
ax.set_xlabel("BPM")
ax.set_xlim(28, 360)


# plt.xscale("log")
plt.set_cmap("Accent")
plt.tight_layout()
plt.savefig("distributions.svg", dpi='figure', format="svg", metadata=None,
        bbox_inches=None, pad_inches=0.1,
        facecolor='auto', edgecolor='auto',
        backend=None
)

In [None]:
linear_bins = np.arange(20, 360, 10)
ballroom, b_tracks, b_tempi = dataset.ballroom_data()
giant_steps, gs_tracks, gs_tempi = dataset.giant_steps_data()

In [None]:
fig, ax = plt.subplots(1,3, figsize=(15,8))

ax[0].hist(dist_gtzan, linear_bins, label="gtzan", color="red")
ax[0].hist(dist_low, linear_bins, label="lognorm @ 70", color="orange", alpha=0.6)
ax[0].title.set_text("GTZAN (1000 tracks)")
ax[0].legend()

ax[1].hist(b_tempi, linear_bins, label="ballroom", color="blue")
ax[1].hist(dist_low, linear_bins, label="lognorm @ 70", color="orange", alpha=0.6)
ax[1].title.set_text("Ballroom (698 tracks)")
ax[1].legend()

ax[2].hist(gs_tempi, linear_bins, label="ballroom", color="green")
ax[2].hist(dist_low, linear_bins, label="lognorm @ 70", color="orange", alpha=0.6)
ax[2].title.set_text("Giant Steps (659 tracks)")
ax[2].legend()
# plt.tight_layout()

In [None]:
combined_hist = []

for i in dist_gtzan:
    combined_hist.append(i)
    
for i in b_tempi:
    combined_hist.append(i)
    
for i in gs_tempi:
    combined_hist.append(i)

In [None]:
plt.hist(combined_hist, linear_bins, label="combined_datasets")
plt.hist(dist_low, linear_bins, alpha=0.6, label="lognormal @ 70")
plt.title("Combined datasets")
plt.legend()